From 2cc7cf4f3230d23678df52ba004e2907485e1ffd Mon Sep 17 00:00:00 2001 From: Nanako <469449812@qq.com> Date: Wed, 31 Jan 2024 15:14:40 +0800 Subject: [PATCH] =?UTF-8?q?=E6=B7=BB=E5=8A=A0HLSLcc=E4=B8=BA=E9=9D=9E?= =?UTF-8?q?=E5=AD=90=E6=A8=A1=E5=9D=97?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- third_party/HLSLcc/.editorconfig | 73 + third_party/HLSLcc/.gitignore | 29 + third_party/HLSLcc/CMakeLists.txt | 51 + third_party/HLSLcc/README.md | 54 + third_party/HLSLcc/include/ShaderInfo.h | 510 ++ .../UnityInstancingFlexibleArraySize.h | 23 + third_party/HLSLcc/include/growing_array.h | 45 + third_party/HLSLcc/include/hlslcc.h | 816 +++ third_party/HLSLcc/include/hlslcc.hpp | 3 + third_party/HLSLcc/include/pstdint.h | 799 +++ third_party/HLSLcc/license.txt | 53 + third_party/HLSLcc/src/ControlFlowGraph.cpp | 815 +++ .../HLSLcc/src/ControlFlowGraphUtils.cpp | 116 + third_party/HLSLcc/src/DataTypeAnalysis.cpp | 777 +++ third_party/HLSLcc/src/Declaration.cpp | 1 + .../HLSLcc/src/HLSLCrossCompilerContext.cpp | 350 ++ third_party/HLSLcc/src/HLSLcc.cpp | 250 + third_party/HLSLcc/src/HLSLccToolkit.cpp | 574 ++ third_party/HLSLcc/src/HLSLccTypes.natvis | 10 + third_party/HLSLcc/src/Instruction.cpp | 349 ++ third_party/HLSLcc/src/LoopTransform.cpp | 370 ++ third_party/HLSLcc/src/Operand.cpp | 641 +++ third_party/HLSLcc/src/Shader.cpp | 989 ++++ third_party/HLSLcc/src/ShaderInfo.cpp | 520 ++ third_party/HLSLcc/src/UseDefineChains.cpp | 814 +++ third_party/HLSLcc/src/cbstring/bsafe.c | 87 + third_party/HLSLcc/src/cbstring/bsafe.h | 43 + third_party/HLSLcc/src/cbstring/bstraux.c | 1273 +++++ third_party/HLSLcc/src/cbstring/bstraux.h | 112 + third_party/HLSLcc/src/cbstring/bstrlib.c | 3280 +++++++++++ third_party/HLSLcc/src/cbstring/bstrlib.h | 306 ++ third_party/HLSLcc/src/cbstring/bstrlib.txt | 3202 +++++++++++ third_party/HLSLcc/src/cbstring/license.txt | 29 + third_party/HLSLcc/src/cbstring/porting.txt | 172 + third_party/HLSLcc/src/cbstring/security.txt | 221 + third_party/HLSLcc/src/decode.cpp | 1635 ++++++ .../src/internal_includes/ControlFlowGraph.h | 151 + .../internal_includes/ControlFlowGraphUtils.h | 30 + .../src/internal_includes/DataTypeAnalysis.h | 15 + .../src/internal_includes/Declaration.h | 118 + .../HLSLCrossCompilerContext.h | 81 + .../src/internal_includes/HLSLccToolkit.h | 134 + .../src/internal_includes/Instruction.h | 184 + .../src/internal_includes/LoopTransform.h | 8 + .../HLSLcc/src/internal_includes/Operand.h | 150 + .../HLSLcc/src/internal_includes/Shader.h | 255 + .../HLSLcc/src/internal_includes/Translator.h | 32 + .../src/internal_includes/UseDefineChains.h | 138 + .../HLSLcc/src/internal_includes/debug.h | 21 + .../HLSLcc/src/internal_includes/decode.h | 10 + .../HLSLcc/src/internal_includes/languages.h | 328 ++ .../HLSLcc/src/internal_includes/reflect.h | 26 + .../HLSLcc/src/internal_includes/toGLSL.h | 244 + .../src/internal_includes/toGLSLOperand.h | 26 + .../HLSLcc/src/internal_includes/toMetal.h | 182 + .../internal_includes/toMetalDeclaration.h | 3 + .../HLSLcc/src/internal_includes/tokens.h | 789 +++ third_party/HLSLcc/src/reflect.cpp | 620 +++ third_party/HLSLcc/src/toGLSL.cpp | 1190 ++++ third_party/HLSLcc/src/toGLSLDeclaration.cpp | 3933 ++++++++++++++ third_party/HLSLcc/src/toGLSLInstruction.cpp | 4801 +++++++++++++++++ third_party/HLSLcc/src/toGLSLOperand.cpp | 1892 +++++++ third_party/HLSLcc/src/toMetal.cpp | 988 ++++ third_party/HLSLcc/src/toMetalDeclaration.cpp | 2454 +++++++++ third_party/HLSLcc/src/toMetalInstruction.cpp | 4096 ++++++++++++++ third_party/HLSLcc/src/toMetalOperand.cpp | 1277 +++++ 66 files changed, 43568 insertions(+) create mode 100644 third_party/HLSLcc/.editorconfig create mode 100644 third_party/HLSLcc/.gitignore create mode 100644 third_party/HLSLcc/CMakeLists.txt create mode 100644 third_party/HLSLcc/README.md create mode 100644 third_party/HLSLcc/include/ShaderInfo.h create mode 100644 third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h create mode 100644 third_party/HLSLcc/include/growing_array.h create mode 100644 third_party/HLSLcc/include/hlslcc.h create mode 100644 third_party/HLSLcc/include/hlslcc.hpp create mode 100644 third_party/HLSLcc/include/pstdint.h create mode 100644 third_party/HLSLcc/license.txt create mode 100644 third_party/HLSLcc/src/ControlFlowGraph.cpp create mode 100644 third_party/HLSLcc/src/ControlFlowGraphUtils.cpp create mode 100644 third_party/HLSLcc/src/DataTypeAnalysis.cpp create mode 100644 third_party/HLSLcc/src/Declaration.cpp create mode 100644 third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp create mode 100644 third_party/HLSLcc/src/HLSLcc.cpp create mode 100644 third_party/HLSLcc/src/HLSLccToolkit.cpp create mode 100644 third_party/HLSLcc/src/HLSLccTypes.natvis create mode 100644 third_party/HLSLcc/src/Instruction.cpp create mode 100644 third_party/HLSLcc/src/LoopTransform.cpp create mode 100644 third_party/HLSLcc/src/Operand.cpp create mode 100644 third_party/HLSLcc/src/Shader.cpp create mode 100644 third_party/HLSLcc/src/ShaderInfo.cpp create mode 100644 third_party/HLSLcc/src/UseDefineChains.cpp create mode 100644 third_party/HLSLcc/src/cbstring/bsafe.c create mode 100644 third_party/HLSLcc/src/cbstring/bsafe.h create mode 100644 third_party/HLSLcc/src/cbstring/bstraux.c create mode 100644 third_party/HLSLcc/src/cbstring/bstraux.h create mode 100644 third_party/HLSLcc/src/cbstring/bstrlib.c create mode 100644 third_party/HLSLcc/src/cbstring/bstrlib.h create mode 100644 third_party/HLSLcc/src/cbstring/bstrlib.txt create mode 100644 third_party/HLSLcc/src/cbstring/license.txt create mode 100644 third_party/HLSLcc/src/cbstring/porting.txt create mode 100644 third_party/HLSLcc/src/cbstring/security.txt create mode 100644 third_party/HLSLcc/src/decode.cpp create mode 100644 third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h create mode 100644 third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h create mode 100644 third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h create mode 100644 third_party/HLSLcc/src/internal_includes/Declaration.h create mode 100644 third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h create mode 100644 third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h create mode 100644 third_party/HLSLcc/src/internal_includes/Instruction.h create mode 100644 third_party/HLSLcc/src/internal_includes/LoopTransform.h create mode 100644 third_party/HLSLcc/src/internal_includes/Operand.h create mode 100644 third_party/HLSLcc/src/internal_includes/Shader.h create mode 100644 third_party/HLSLcc/src/internal_includes/Translator.h create mode 100644 third_party/HLSLcc/src/internal_includes/UseDefineChains.h create mode 100644 third_party/HLSLcc/src/internal_includes/debug.h create mode 100644 third_party/HLSLcc/src/internal_includes/decode.h create mode 100644 third_party/HLSLcc/src/internal_includes/languages.h create mode 100644 third_party/HLSLcc/src/internal_includes/reflect.h create mode 100644 third_party/HLSLcc/src/internal_includes/toGLSL.h create mode 100644 third_party/HLSLcc/src/internal_includes/toGLSLOperand.h create mode 100644 third_party/HLSLcc/src/internal_includes/toMetal.h create mode 100644 third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h create mode 100644 third_party/HLSLcc/src/internal_includes/tokens.h create mode 100644 third_party/HLSLcc/src/reflect.cpp create mode 100644 third_party/HLSLcc/src/toGLSL.cpp create mode 100644 third_party/HLSLcc/src/toGLSLDeclaration.cpp create mode 100644 third_party/HLSLcc/src/toGLSLInstruction.cpp create mode 100644 third_party/HLSLcc/src/toGLSLOperand.cpp create mode 100644 third_party/HLSLcc/src/toMetal.cpp create mode 100644 third_party/HLSLcc/src/toMetalDeclaration.cpp create mode 100644 third_party/HLSLcc/src/toMetalInstruction.cpp create mode 100644 third_party/HLSLcc/src/toMetalOperand.cpp diff --git a/third_party/HLSLcc/.editorconfig b/third_party/HLSLcc/.editorconfig new file mode 100644 index 0000000..51442de --- /dev/null +++ b/third_party/HLSLcc/.editorconfig @@ -0,0 +1,73 @@ +# see http://editorconfig.org/ for docs on this file + +root = true + +[*] +# help with sharing files across os's (i.e. network share or through local vm) +end_of_line = lf +#charset temporarily disabled due to bug in VS2017 changing to UTF-8 with BOM (https://favro.com/card/c564ede4ed3337f7b17986b6/Uni-17877) +#charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = true + +# formattable file extensions (keep in sync with format.ini from unity-meta repo) +# +# Note: We need to split the formattable files configs into shorter duplicate entries (logically grouped) +# due to known issue in VS editorconfig extension where there is a limit of 51 characters (empirically determined). +# see: https://github.com/editorconfig/editorconfig-visualstudio/issues/21 +# +## uncrustify +[*.{c,h,cpp,hpp,m,mm,cc,cs}] +indent_style = space +indent_size = 4 + +## generic formatter (shaders) +[*.{cg,cginc,glslinc,hlsl,shader,y,ypp,yy}] +indent_style = space +indent_size = 4 + +## generic formatter (misc) +[*.{asm,s,S,pch,pchmm,java,sh,uss}] +indent_style = space +indent_size = 4 + +## perltidy +[*.{pl,pm,t,it}] +indent_style = space +indent_size = 4 + +## unity special +[*.{bindings,mem.xml}] +indent_style = space +indent_size = 4 + +# other filetypes we want to overwrite default configuration to preserve the standard +[{Makefile,makefile}] +# TAB characters are part of the Makefile format +indent_style = tab + +[*.{md,markdown}] +# trailing whitespace is significant in markdown (bad choice, bad!) +trim_trailing_whitespace = false + +# keep these and the VS stuff below in sync with .hgeol's CRLF extensions +[*.{vcproj,bat,cmd,xaml,tt,t4,ttinclude}] +end_of_line = crlf + +# this VS-specific stuff is based on experiments to see how VS will modify a file after it has been manually edited. +# the settings are meant to closely match what VS does to minimize unnecessary diffs. this duplicates some settings in * +# but let's be explicit here to be safe (in case someone wants to copy-paste this out to another .editorconfig). +[*.{vcxproj,vcxproj.filters,csproj,props,targets}] +indent_style = space +indent_size = 2 +end_of_line = crlf +charset = utf-8-bom +trim_trailing_whitespace = true +insert_final_newline = false +[*.{sln,sln.template}] +indent_style = tab +indent_size = 4 +end_of_line = crlf +charset = utf-8 +trim_trailing_whitespace = true +insert_final_newline = false diff --git a/third_party/HLSLcc/.gitignore b/third_party/HLSLcc/.gitignore new file mode 100644 index 0000000..4581ef2 --- /dev/null +++ b/third_party/HLSLcc/.gitignore @@ -0,0 +1,29 @@ +# Compiled Object files +*.slo +*.lo +*.o +*.obj + +# Precompiled Headers +*.gch +*.pch + +# Compiled Dynamic libraries +*.so +*.dylib +*.dll + +# Fortran module files +*.mod +*.smod + +# Compiled Static libraries +*.lai +*.la +*.a +*.lib + +# Executables +*.exe +*.out +*.app diff --git a/third_party/HLSLcc/CMakeLists.txt b/third_party/HLSLcc/CMakeLists.txt new file mode 100644 index 0000000..176dd6e --- /dev/null +++ b/third_party/HLSLcc/CMakeLists.txt @@ -0,0 +1,51 @@ + +cmake_minimum_required(VERSION 3.15) + +project(HLSLcc) +set(CMAKE_CXX_STANDARD 11) + +option(HLSLCC_LIBRARY_SHARED "Build shared library instead of static." ON) + +file(GLOB HLSLCC_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/*") +set(HLSLCC_SRC + src/ControlFlowGraph.cpp + src/ControlFlowGraphUtils.cpp + src/DataTypeAnalysis.cpp + src/Declaration.cpp + src/decode.cpp + src/HLSLcc.cpp + src/HLSLccToolkit.cpp + src/HLSLCrossCompilerContext.cpp + src/Instruction.cpp + src/LoopTransform.cpp + src/Operand.cpp + src/reflect.cpp + src/Shader.cpp + src/ShaderInfo.cpp + src/toGLSL.cpp + src/toGLSLDeclaration.cpp + src/toGLSLInstruction.cpp + src/toGLSLOperand.cpp + src/toMetal.cpp + src/toMetalDeclaration.cpp + src/toMetalInstruction.cpp + src/toMetalOperand.cpp + src/UseDefineChains.cpp + src/cbstring/bsafe.c + src/cbstring/bstraux.c + src/cbstring/bstrlib.c) + +if(HLSLCC_LIBRARY_SHARED) + add_library(${PROJECT_NAME} SHARED ${HLSLCC_SRC}) +else() + add_library(${PROJECT_NAME} STATIC ${HLSLCC_SRC}) +endif() + +target_include_directories(${PROJECT_NAME} + PUBLIC + ${CMAKE_CURRENT_SOURCE_DIR} + ${CMAKE_CURRENT_SOURCE_DIR}/include + PRIVATE + ${CMAKE_CURRENT_SOURCE_DIR}/src + ${CMAKE_CURRENT_SOURCE_DIR}/src/cbstring + ${CMAKE_CURRENT_SOURCE_DIR}/src/internal_includes) diff --git a/third_party/HLSLcc/README.md b/third_party/HLSLcc/README.md new file mode 100644 index 0000000..9fe4a5b --- /dev/null +++ b/third_party/HLSLcc/README.md @@ -0,0 +1,54 @@ +# HLSLcc +DirectX shader bytecode cross compiler. + +Originally based on https://github.com/James-Jones/HLSLCrossCompiler. + +This library takes DirectX bytecode as input, and translates it into the following languages: +- GLSL (OpenGL 3.2 and later) +- GLSL ES (OpenGL ES 2.0 and later) +- GLSL for Vulkan consumption (as input for Glslang to generate SPIR-V) +- Metal Shading Language + +This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan. + +Changes from original HLSLCrossCompiler: +- Codebase changed to C++11, with major code reorganizations. +- Support for multiple language output backends (currently ToGLSL and ToMetal) +- Metal language output support +- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts). +- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form +- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers. +- Reflection interface to retrieve the shader inputs and their types. +- Lots of workarounds for various driver/shader compiler bugs. +- Lots of minor fixes and improvements for correctness +- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself. + +## Note + +This project is originally integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile `src/*.cpp` (in C++11 mode!) and `src/cbstring/*.c` with the following include paths: + +- include +- src/internal_includes +- src/cbstrinc +- src + +Alternatively, a CMakeLists.txt is provided to build the project using cmake. + +The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input). + + +## Contributors +- Mikko Strandborg +- Juho Oravainen +- David Rogers +- Marton Ekler +- Antti Tapaninen +- Florian Penzkofer +- Alexey Orlov +- Povilas Kanapickas +- Aleksandr Kirillov +- Kay Chang + +## License + +MIT license for HLSLcc itself, BSD license for the bstring library. See license.txt. diff --git a/third_party/HLSLcc/include/ShaderInfo.h b/third_party/HLSLcc/include/ShaderInfo.h new file mode 100644 index 0000000..dbaf258 --- /dev/null +++ b/third_party/HLSLcc/include/ShaderInfo.h @@ -0,0 +1,510 @@ +#pragma once + +#include +#include +#include +#include +#include "growing_array.h" +#include + +//Reflection +#define MAX_RESOURCE_BINDINGS 256 + +typedef enum _SHADER_VARIABLE_TYPE +{ + SVT_VOID = 0, + SVT_BOOL = 1, + SVT_INT = 2, + SVT_FLOAT = 3, + SVT_STRING = 4, + SVT_TEXTURE = 5, + SVT_TEXTURE1D = 6, + SVT_TEXTURE2D = 7, + SVT_TEXTURE3D = 8, + SVT_TEXTURECUBE = 9, + SVT_SAMPLER = 10, + SVT_PIXELSHADER = 15, + SVT_VERTEXSHADER = 16, + SVT_UINT = 19, + SVT_UINT8 = 20, + SVT_GEOMETRYSHADER = 21, + SVT_RASTERIZER = 22, + SVT_DEPTHSTENCIL = 23, + SVT_BLEND = 24, + SVT_BUFFER = 25, + SVT_CBUFFER = 26, + SVT_TBUFFER = 27, + SVT_TEXTURE1DARRAY = 28, + SVT_TEXTURE2DARRAY = 29, + SVT_RENDERTARGETVIEW = 30, + SVT_DEPTHSTENCILVIEW = 31, + SVT_TEXTURE2DMS = 32, + SVT_TEXTURE2DMSARRAY = 33, + SVT_TEXTURECUBEARRAY = 34, + SVT_HULLSHADER = 35, + SVT_DOMAINSHADER = 36, + SVT_INTERFACE_POINTER = 37, + SVT_COMPUTESHADER = 38, + SVT_DOUBLE = 39, + SVT_RWTEXTURE1D = 40, + SVT_RWTEXTURE1DARRAY = 41, + SVT_RWTEXTURE2D = 42, + SVT_RWTEXTURE2DARRAY = 43, + SVT_RWTEXTURE3D = 44, + SVT_RWBUFFER = 45, + SVT_BYTEADDRESS_BUFFER = 46, + SVT_RWBYTEADDRESS_BUFFER = 47, + SVT_STRUCTURED_BUFFER = 48, + SVT_RWSTRUCTURED_BUFFER = 49, + SVT_APPEND_STRUCTURED_BUFFER = 50, + SVT_CONSUME_STRUCTURED_BUFFER = 51, + + + // Only used as a marker when analyzing register types + SVT_FORCED_INT = 152, + // Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis + SVT_INT_AMBIGUOUS = 153, + + // Partial precision types. Used when doing type analysis + SVT_FLOAT10 = 53, // Seems to be used in constant buffers + SVT_FLOAT16 = 54, + SVT_INT16 = 156, + SVT_INT12 = 157, + SVT_UINT16 = 158, + + SVT_FORCE_DWORD = 0x7fffffff +} SHADER_VARIABLE_TYPE; + +typedef enum _SHADER_VARIABLE_CLASS +{ + SVC_SCALAR = 0, + SVC_VECTOR = (SVC_SCALAR + 1), + SVC_MATRIX_ROWS = (SVC_VECTOR + 1), + SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1), + SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1), + SVC_STRUCT = (SVC_OBJECT + 1), + SVC_INTERFACE_CLASS = (SVC_STRUCT + 1), + SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1), + SVC_FORCE_DWORD = 0x7fffffff +} SHADER_VARIABLE_CLASS; + + +/////////////////////////////////////// +// Types + +enum TESSELLATOR_PARTITIONING +{ + TESSELLATOR_PARTITIONING_UNDEFINED = 0, + TESSELLATOR_PARTITIONING_INTEGER = 1, + TESSELLATOR_PARTITIONING_POW2 = 2, + TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3, + TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4 +}; + +enum TESSELLATOR_OUTPUT_PRIMITIVE +{ + TESSELLATOR_OUTPUT_UNDEFINED = 0, + TESSELLATOR_OUTPUT_POINT = 1, + TESSELLATOR_OUTPUT_LINE = 2, + TESSELLATOR_OUTPUT_TRIANGLE_CW = 3, + TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4 +}; + +typedef enum TESSELLATOR_DOMAIN +{ + TESSELLATOR_DOMAIN_UNDEFINED = 0, + TESSELLATOR_DOMAIN_ISOLINE = 1, + TESSELLATOR_DOMAIN_TRI = 2, + TESSELLATOR_DOMAIN_QUAD = 3 +} TESSELLATOR_DOMAIN; + +enum SPECIAL_NAME +{ + NAME_UNDEFINED = 0, + NAME_POSITION = 1, + NAME_CLIP_DISTANCE = 2, + NAME_CULL_DISTANCE = 3, + NAME_RENDER_TARGET_ARRAY_INDEX = 4, + NAME_VIEWPORT_ARRAY_INDEX = 5, + NAME_VERTEX_ID = 6, + NAME_PRIMITIVE_ID = 7, + NAME_INSTANCE_ID = 8, + NAME_IS_FRONT_FACE = 9, + NAME_SAMPLE_INDEX = 10, + // The following are added for D3D11 + NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11, + NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12, + NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13, + NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14, + NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15, + NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16, + NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17, + NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18, + NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19, + NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20, + NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21, + NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22, +}; + + +enum INOUT_COMPONENT_TYPE +{ + INOUT_COMPONENT_UNKNOWN = 0, + INOUT_COMPONENT_UINT32 = 1, + INOUT_COMPONENT_SINT32 = 2, + INOUT_COMPONENT_FLOAT32 = 3 +}; + +enum MIN_PRECISION +{ + MIN_PRECISION_DEFAULT = 0, + MIN_PRECISION_FLOAT_16 = 1, + MIN_PRECISION_FLOAT_2_8 = 2, + MIN_PRECISION_RESERVED = 3, + MIN_PRECISION_SINT_16 = 4, + MIN_PRECISION_UINT_16 = 5, + MIN_PRECISION_ANY_16 = 0xf0, + MIN_PRECISION_ANY_10 = 0xf1 +}; + +enum ResourceType +{ + RTYPE_CBUFFER,//0 + RTYPE_TBUFFER,//1 + RTYPE_TEXTURE,//2 + RTYPE_SAMPLER,//3 + RTYPE_UAV_RWTYPED,//4 + RTYPE_STRUCTURED,//5 + RTYPE_UAV_RWSTRUCTURED,//6 + RTYPE_BYTEADDRESS,//7 + RTYPE_UAV_RWBYTEADDRESS,//8 + RTYPE_UAV_APPEND_STRUCTURED,//9 + RTYPE_UAV_CONSUME_STRUCTURED,//10 + RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11 + RTYPE_COUNT, +}; + +enum ResourceGroup +{ + RGROUP_CBUFFER, + RGROUP_TEXTURE, + RGROUP_SAMPLER, + RGROUP_UAV, + RGROUP_COUNT, +}; + +enum REFLECT_RESOURCE_DIMENSION +{ + REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0, + REFLECT_RESOURCE_DIMENSION_BUFFER = 1, + REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2, + REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3, + REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6, + REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7, + REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9, + REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11, +}; + +enum REFLECT_RESOURCE_PRECISION +{ + REFLECT_RESOURCE_PRECISION_UNKNOWN = 0, + REFLECT_RESOURCE_PRECISION_LOWP = 1, + REFLECT_RESOURCE_PRECISION_MEDIUMP = 2, + REFLECT_RESOURCE_PRECISION_HIGHP = 3, +}; + +enum RESOURCE_RETURN_TYPE +{ + RETURN_TYPE_UNORM = 1, + RETURN_TYPE_SNORM = 2, + RETURN_TYPE_SINT = 3, + RETURN_TYPE_UINT = 4, + RETURN_TYPE_FLOAT = 5, + RETURN_TYPE_MIXED = 6, + RETURN_TYPE_DOUBLE = 7, + RETURN_TYPE_CONTINUED = 8, + RETURN_TYPE_UNUSED = 9, +}; + +typedef std::map HLSLccSamplerPrecisionInfo; + +struct ResourceBinding +{ + std::string name; + ResourceType eType; + uint32_t ui32BindPoint; + uint32_t ui32BindCount; + uint32_t ui32Flags; + uint32_t ui32Space; + uint32_t ui32RangeID; + REFLECT_RESOURCE_DIMENSION eDimension; + RESOURCE_RETURN_TYPE ui32ReturnType; + uint32_t ui32NumSamples; + REFLECT_RESOURCE_PRECISION ePrecision; + int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with + + SHADER_VARIABLE_TYPE GetDataType() const + { + switch (ePrecision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT10; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT10; + } + + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT16; + case RETURN_TYPE_SINT: + return SVT_INT16; + case RETURN_TYPE_UINT: + return SVT_UINT16; + default: +// ASSERT(0); + return SVT_FLOAT16; + } + + default: + switch (ui32ReturnType) + { + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + case RETURN_TYPE_FLOAT: + return SVT_FLOAT; + case RETURN_TYPE_SINT: + return SVT_INT; + case RETURN_TYPE_UINT: + return SVT_UINT; + case RETURN_TYPE_DOUBLE: + return SVT_DOUBLE; + default: +// ASSERT(0); + return SVT_FLOAT; + } + } + } +}; + +struct ShaderVarType +{ + ShaderVarType() : + Class(), + Type(), + Rows(), + Columns(), + Elements(), + MemberCount(), + Offset(), + ParentCount(), + Parent(), + m_IsUsed(false) + {} + + SHADER_VARIABLE_CLASS Class; + SHADER_VARIABLE_TYPE Type; + uint32_t Rows; + uint32_t Columns; + uint32_t Elements; + uint32_t MemberCount; + uint32_t Offset; + std::string name; + + uint32_t ParentCount; + struct ShaderVarType * Parent; + //Includes all parent names. + std::string fullName; + + std::vector Members; + + bool m_IsUsed; // If not set, is not used in the shader code + + uint32_t GetMemberCount() const + { + if (Class == SVC_STRUCT) + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = Members.begin(); itr != Members.end(); itr++) + { + res += itr->GetMemberCount(); + } + return res; + } + else + return 1; + } +}; + +struct ShaderVar +{ + std::string name; + int haveDefaultValue; + std::vector pui32DefaultValues; + //Offset/Size in bytes. + uint32_t ui32StartOffset; + uint32_t ui32Size; + + ShaderVarType sType; +}; + +struct ConstantBuffer +{ + std::string name; + + std::vector asVars; + + uint32_t ui32TotalSizeInBytes; + + uint32_t GetMemberCount(bool stripUnused) const + { + uint32_t res = 0; + std::vector::const_iterator itr; + for (itr = asVars.begin(); itr != asVars.end(); itr++) + { + if (stripUnused && !itr->sType.m_IsUsed) + continue; + res += itr->sType.GetMemberCount(); + } + return res; + } +}; + +struct ClassType +{ + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBufStride; + uint16_t ui16Texture; + uint16_t ui16Sampler; +}; + +struct ClassInstance +{ + std::string name; + uint16_t ui16ID; + uint16_t ui16ConstBuf; + uint16_t ui16ConstBufOffset; + uint16_t ui16Texture; + uint16_t ui16Sampler; +}; + +class Operand; + +class ShaderInfo +{ +public: + + struct InOutSignature + { + std::string semanticName; + uint32_t ui32SemanticIndex; + SPECIAL_NAME eSystemValueType; + INOUT_COMPONENT_TYPE eComponentType; + uint32_t ui32Register; + uint32_t ui32Mask; + uint32_t ui32ReadWriteMask; + + int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle + + uint32_t ui32Stream; + MIN_PRECISION eMinPrec; + + std::set isIndexed; // Set of phases where this input/output is part of a index range. + std::map indexStart; // If indexed, contains the start index for the range + std::map index; // If indexed, contains the current index relative to the index start. + }; + + ShaderInfo() : + ui32MajorVersion(), + ui32MinorVersion(), + psResourceBindings(), + psConstantBuffers(), + psThisPointerConstBuffer(), + psClassTypes(), + psClassInstances() + {} + + SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo); + + int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const; + + void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const; + + int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const; + + int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const; + int GetOutputSignatureFromRegister(const uint32_t ui32Register, + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull = false) const; + + int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const; + + static ResourceGroup ResourceTypeToResourceGroup(ResourceType); + + static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false); + + static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, + const uint32_t(&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags); + + static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors); + + // Apply shader precision information to resource bindings + void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info); + + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + + std::vector psInputSignatures; + std::vector psOutputSignatures; + std::vector psPatchConstantSignatures; + + std::vector psResourceBindings; + + std::vector psConstantBuffers; + ConstantBuffer* psThisPointerConstBuffer; + + std::vector psClassTypes; + std::vector psClassInstances; + + //Func table ID to class name ID. + HLSLcc::growing_vector aui32TableIDToTypeID; + + HLSLcc::growing_vector aui32ResourceMap[RGROUP_COUNT]; + + HLSLcc::growing_vector sGroupSharedVarType; + + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t ui32TessInputControlPointCount; + uint32_t ui32TessOutputControlPointCount; + TESSELLATOR_DOMAIN eTessDomain; + bool bEarlyFragmentTests; +}; diff --git a/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h b/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h new file mode 100644 index 0000000..70fb308 --- /dev/null +++ b/third_party/HLSLcc/include/UnityInstancingFlexibleArraySize.h @@ -0,0 +1,23 @@ +#pragma once + +// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro. + +#include +#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE" +#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform" + +const unsigned int kArraySizeConstantID = 0; +const unsigned int kPreTransformConstantID = 1; + +// TODO: share with Runtime/GfxDevice/InstancingUtilities.h +inline bool IsUnityInstancingConstantBufferName(const char* cbName) +{ + static const char kInstancedCbNamePrefix[] = "UnityInstancing"; + return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0; +} + +inline bool IsPreTransformConstantBufferName(const char* cbName) +{ + static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData"; + return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0; +} diff --git a/third_party/HLSLcc/include/growing_array.h b/third_party/HLSLcc/include/growing_array.h new file mode 100644 index 0000000..d558665 --- /dev/null +++ b/third_party/HLSLcc/include/growing_array.h @@ -0,0 +1,45 @@ +#pragma once + +namespace HLSLcc +{ + // A vector that automatically grows when written to, fills the intermediate ones with default value. + // Reading from an index returns the default value if attempting to access out of bounds. + template class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + T & operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2); + return data[idx]; + } + + const T & operator[](std::size_t idx) const + { + static T defaultValue = T(); + if (idx >= data.size()) + return defaultValue; + return data[idx]; + } + }; + + // Same but with bool specialization + template<> class growing_vector + { + public: + growing_vector() : data() {} + + std::vector data; + + std::vector::reference operator[](std::size_t idx) + { + if (idx >= data.size()) + data.resize((idx + 1) * 2, false); + return data[idx]; + } + }; +} diff --git a/third_party/HLSLcc/include/hlslcc.h b/third_party/HLSLcc/include/hlslcc.h new file mode 100644 index 0000000..dc7853a --- /dev/null +++ b/third_party/HLSLcc/include/hlslcc.h @@ -0,0 +1,816 @@ +#ifndef HLSLCC_H_ +#define HLSLCC_H_ + +#include +#include +#include +#include + +#if defined(_WIN32) && defined(HLSLCC_DYNLIB) + #define HLSLCC_APIENTRY __stdcall + #if defined(libHLSLcc_EXPORTS) + #define HLSLCC_API __declspec(dllexport) + #else + #define HLSLCC_API __declspec(dllimport) + #endif +#else + #define HLSLCC_APIENTRY + #define HLSLCC_API +#endif + +#include +#include + +typedef enum +{ + LANG_DEFAULT,// Depends on the HLSL shader model. + LANG_ES_100, LANG_ES_FIRST = LANG_ES_100, + LANG_ES_300, + LANG_ES_310, LANG_ES_LAST = LANG_ES_310, + LANG_120, LANG_GL_FIRST = LANG_120, + LANG_130, + LANG_140, + LANG_150, + LANG_330, + LANG_400, + LANG_410, + LANG_420, + LANG_430, + LANG_440, LANG_GL_LAST = LANG_440, + LANG_METAL, +} GLLang; + +typedef struct GlExtensions +{ + uint32_t ARB_explicit_attrib_location : 1; + uint32_t ARB_explicit_uniform_location : 1; + uint32_t ARB_shading_language_420pack : 1; + uint32_t OVR_multiview : 1; + uint32_t EXT_shader_framebuffer_fetch : 1; +} GlExtensions; + +#include "ShaderInfo.h" +#include "UnityInstancingFlexibleArraySize.h" + +typedef std::vector TextureSamplerPairs; + +typedef enum INTERPOLATION_MODE +{ + INTERPOLATION_UNDEFINED = 0, + INTERPOLATION_CONSTANT = 1, + INTERPOLATION_LINEAR = 2, + INTERPOLATION_LINEAR_CENTROID = 3, + INTERPOLATION_LINEAR_NOPERSPECTIVE = 4, + INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5, + INTERPOLATION_LINEAR_SAMPLE = 6, + INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7, +} INTERPOLATION_MODE; + +#define PS_FLAG_VERTEX_SHADER 0x1 +#define PS_FLAG_HULL_SHADER 0x2 +#define PS_FLAG_DOMAIN_SHADER 0x4 +#define PS_FLAG_GEOMETRY_SHADER 0x8 +#define PS_FLAG_PIXEL_SHADER 0x10 + +#define TO_FLAG_NONE 0x0 +#define TO_FLAG_INTEGER 0x1 +#define TO_FLAG_NAME_ONLY 0x2 +#define TO_FLAG_DECLARATION_NAME 0x4 +#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment. +#define TO_FLAG_UNSIGNED_INTEGER 0x10 +#define TO_FLAG_DOUBLE 0x20 +// --- TO_AUTO_BITCAST_TO_FLOAT --- +//If the operand is an integer temp variable then this flag +//indicates that the temp has a valid floating point encoding +//and that the current expression expects the operand to be floating point +//and therefore intBitsToFloat must be applied to that variable. +#define TO_AUTO_BITCAST_TO_FLOAT 0x40 +#define TO_AUTO_BITCAST_TO_INT 0x80 +#define TO_AUTO_BITCAST_TO_UINT 0x100 +// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX +// to match HLSL functionality. +#define TO_AUTO_EXPAND_TO_VEC2 0x200 +#define TO_AUTO_EXPAND_TO_VEC3 0x400 +#define TO_AUTO_EXPAND_TO_VEC4 0x800 +#define TO_FLAG_BOOL 0x1000 +// These flags are only used for Metal: +// Force downscaling of the operand to match +// the other operand (Metal doesn't like mixing halfs with floats) +#define TO_FLAG_FORCE_HALF 0x2000 + +typedef enum +{ + INVALID_SHADER = -1, + PIXEL_SHADER, + VERTEX_SHADER, + GEOMETRY_SHADER, + HULL_SHADER, + DOMAIN_SHADER, + COMPUTE_SHADER, +} SHADER_TYPE; + +// Enum for texture dimension reflection data +typedef enum +{ + TD_FLOAT = 0, + TD_INT, + TD_2D, + TD_3D, + TD_CUBE, + TD_2DSHADOW, + TD_2DARRAY, + TD_CUBEARRAY +} HLSLCC_TEX_DIMENSION; + +// The prefix for all temporary variables used by the generated code. +// Using a texture or uniform name like this will cause conflicts +#define HLSLCC_TEMP_PREFIX "u_xlat" + +typedef std::vector > MemberDefinitions; + +// We store struct definition contents inside a vector of strings +struct StructDefinition +{ + StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {} + + MemberDefinitions m_Members; // A vector of strings with the struct members + std::vector m_Dependencies; // A vector of struct names this struct depends on. + bool m_IsPrinted; // Has this struct been printed out yet? +}; + +typedef std::map StructDefinitions; + +// Map of extra function definitions we need to add before the shader body but after the declarations. +typedef std::map FunctionDefinitions; + +// A helper class for allocating binding slots +// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc) +class BindingSlotAllocator +{ + typedef std::map SlotMap; + SlotMap m_Allocations; + uint32_t m_ShaderStageAllocations; +public: + BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0) + { + for (int i = MAX_RESOURCE_BINDINGS - 1; i >= 0; i--) + m_FreeSlots.push_back(i); + } + + enum BindType + { + ConstantBuffer = 0, + RWBuffer, + Texture, + UAV + }; + + uint32_t GetBindingSlot(uint32_t regNo, BindType type) + { + // The key is regNumber with the bindtype stored to highest 16 bits + uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16); + SlotMap::iterator itr = m_Allocations.find(key); + if (itr == m_Allocations.end()) + { + uint32_t slot = m_FreeSlots.back(); + m_FreeSlots.pop_back(); + m_Allocations.insert(std::make_pair(key, slot)); + return slot; + } + return itr->second; + } + + // Func for reserving binding slots with the original reg number. + // Used for fragment shader UAVs (SetRandomWriteTarget etc). + void ReserveBindingSlot(uint32_t regNo, BindType type) + { + uint32_t key = regNo | (uint32_t(type) << 16); + m_Allocations.insert(std::make_pair(key, regNo)); + + // Remove regNo from free slots + for (int i = m_FreeSlots.size() - 1; i >= 0; i--) + { + if (m_FreeSlots[i] == regNo) + { + m_FreeSlots.erase(m_FreeSlots.begin() + i); + return; + } + } + } + + uint32_t PeekFirstFreeSlot() const + { + return m_FreeSlots.back(); + } + + uint32_t SaveTotalShaderStageAllocationsCount() + { + m_ShaderStageAllocations = m_Allocations.size(); + return m_ShaderStageAllocations; + } + +private: + std::vector m_FreeSlots; +}; + +//The shader stages (Vertex, Pixel et al) do not depend on each other +//in HLSL. GLSL is a different story. HLSLCrossCompiler requires +//that hull shaders must be compiled before domain shaders, and +//the pixel shader must be compiled before all of the others. +//During compilation the GLSLCrossDependencyData struct will +//carry over any information needed about a different shader stage +//in order to construct valid GLSL shader combinations. + + +//Using GLSLCrossDependencyData is optional. However some shader +//combinations may show link failures, or runtime errors. +class GLSLCrossDependencyData +{ +public: + + struct GLSLBufferBindPointInfo + { + uint32_t slot; + bool known; + }; + + // A container for a single Vulkan resource binding ( pair) + struct VulkanResourceBinding + { + uint32_t set; + uint32_t binding; + }; + + enum GLSLBufferType + { + BufferType_ReadWrite, + BufferType_Constant, + BufferType_SSBO, + BufferType_Texture, + BufferType_UBO, + + BufferType_Count, + BufferType_Generic = BufferType_ReadWrite + }; + +private: + //Required if PixelInterpDependency is true + std::vector pixelInterpolation; + + // Map of varying locations, indexed by varying names. + typedef std::map VaryingLocations; + + static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output) + + VaryingLocations varyingLocationsMap[MAX_NAMESPACES]; + uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES]; + + typedef std::map VulkanResourceBindings; + VulkanResourceBindings m_VulkanResourceBindings; + uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set. + + typedef std::map GLSLResouceBindings; + +public: + GLSLResouceBindings m_GLSLResourceBindings; + uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges + uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings + + inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput) + { + switch (eShaderType) + { + case VERTEX_SHADER: + return isInput ? 0 : 1; + + case HULL_SHADER: + return isInput ? 1 : 2; + + case DOMAIN_SHADER: + return isInput ? 2 : 3; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (isInput) + { + return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1; + } + return 4; + + case PIXEL_SHADER: + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (isInput) + { + if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + return 4; + } + else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + return 3; + } + else + { + return 1; + } + } + return 5; // This value never really used + default: + return 0; + } + } + +public: + GLSLCrossDependencyData() + : eTessPartitioning(), + eTessOutPrim(), + fMaxTessFactor(64.0), + numPatchesInThreadGroup(0), + hasControlPoint(false), + hasPatchConstant(false), + ui32ProgramStages(0), + m_ExtBlendModes() + { + memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation)); + memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding)); + memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding)); + } + + // Retrieve the location for a varying with a given name. + // If the name doesn't already have an allocated location, allocate one + // and store it into the map. + inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex) + { + int nspace = GetVaryingNamespace(eShaderType, isInput); + VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name); + if (itr != varyingLocationsMap[nspace].end()) + return itr->second; + + if (keepLocation) + { + // Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) + + // Inspect last 2 characters in name + size_t len = name.length(); + + if (len > 1) + { + if (isdigit(name[len - 1])) + { + uint32_t index = 0; + if (isdigit(name[len - 2])) + index = atoi(&name[len - 2]); // 2-digits index + else + index = atoi(&name[len - 1]); // 1-digit index + + if (index < 32) // Some platforms only allow 32 varying locations + { + // Check that index is not already used + bool canUseIndex = true; + for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) + { + if (it->second == index) + { + canUseIndex = false; + break; + } + } + + if (canUseIndex) + { + varyingLocationsMap[nspace].insert(std::make_pair(name, index)); + return index; + } + } + } + } + + // fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis + uint32_t maxIndexAlreadyAssigned = 0; + for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it) + maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second); + + uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1); + varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex)); + return fallbackIndex; + } + else + { + uint32_t newKey = nextAvailableVaryingLocation[nspace]; + nextAvailableVaryingLocation[nspace]++; + varyingLocationsMap[nspace].insert(std::make_pair(name, newKey)); + return newKey; + } + } + + // Retrieve the binding for a resource (texture, constant buffer, image) with a given name + // If not found, allocate a new one (in set 0) and return that + // The returned value is a pair of + // If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name) + // will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified + // if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter' + inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0) + { + // scan for the special marker + const char *marker = "Xhlslcc_set_%d_bind_%dX"; + uint32_t Set = 0, Binding = 0; + size_t startLoc = name.find("Xhlslcc"); + if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2)) + { + // Get rid of all markers + while ((startLoc = name.find("Xhlslcc")) != std::string::npos) + { + size_t endLoc = name.find('X', startLoc + 1); + if (endLoc == std::string::npos) + break; + name.erase(startLoc, endLoc - startLoc + 1); + } + // Add to map + VulkanResourceBinding newBind = { Set, Binding }; + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = { Set, Binding + 1 }; + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + + return newBind; + } + + VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name); + if (itr != m_VulkanResourceBindings.end()) + return itr->second; + + // Allocate a new one + VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name, newBind)); + if (allocRoomForCounter) + { + VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] }; + m_NextAvailableVulkanResourceBinding[preferredSet]++; + m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind)); + } + return newBind; + } + + // GLSL Bind point handling logic + // Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations + + // HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers. + // The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers. + // In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero. + // This is only used when we are doing old style binding setup + void SetupGLSLResourceBindingSlotsIndices() + { + for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++) + { + m_StructuredBufferBindPoints[i] = i; + } + } + + void RemoveBindPointFromAvailableList(uint32_t bindPoint) + { + for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++) + { + if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here + { + memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t)); + break; + } + } + } + + void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type) + { + m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint)); + RemoveBindPointFromAvailableList(bindPoint); + } + + bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType) + { + return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO; + } + + uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType) + { + uint32_t binding = -1; + + if (ShouldUseBufferSpecificBinding(bufferType)) + { + binding = m_NextAvailableGLSLResourceBinding[bufferType]; + } + else + { + binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]]; + } + + return binding; + } + + void UpdateResourceBindingIndex(GLSLBufferType bufferType) + { + if (ShouldUseBufferSpecificBinding(bufferType)) + { + m_NextAvailableGLSLResourceBinding[bufferType]++; + } + else + { + m_NextAvailableGLSLResourceBinding[BufferType_Generic]++; + } + } + + inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType) + { + GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name); + if (itr != m_GLSLResourceBindings.end()) + { + return GLSLBufferBindPointInfo{ itr->second, true }; + } + + uint32_t binding = GetGLSLBufferBindPointIndex(bufferType); + UpdateResourceBindingIndex(bufferType); + + m_GLSLResourceBindings.insert(std::make_pair(name, binding)); + + return GLSLBufferBindPointInfo{ binding, false }; + } + + //dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D, + //but they appear on inputs inside domain shaders for GL. + //Hull shader must be compiled before domain so the + //ensure correct partitioning and primitive type information + //can be saved when compiling hull and passed to domain compilation. + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + float fMaxTessFactor; + int numPatchesInThreadGroup; + bool hasControlPoint; + bool hasPatchConstant; + + // Bitfield for the shader stages this program is going to include (see PS_FLAG_*). + // Needed so we can construct proper shader input and output names + uint32_t ui32ProgramStages; + + std::vector m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list. + + inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo) + { + if (regNo >= pixelInterpolation.size()) + return INTERPOLATION_UNDEFINED; + else + return pixelInterpolation[regNo]; + } + + inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode) + { + if (regNo >= pixelInterpolation.size()) + pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED); + + pixelInterpolation[regNo] = mode; + } + + struct CompareFirst + { + CompareFirst(std::string val) : m_Val(val) {} + bool operator()(const std::pair& elem) const + { + return m_Val == elem.first; + } + + private: + std::string m_Val; + }; + + inline bool IsMemberDeclared(const std::string &name) + { + if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end()) + return true; + return false; + } + + MemberDefinitions m_SharedFunctionMembers; + std::vector m_SharedDependencies; + BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots; + BindingSlotAllocator m_SharedBufferSlots; + + inline void ClearCrossDependencyData() + { + pixelInterpolation.clear(); + for (int i = 0; i < MAX_NAMESPACES; i++) + { + varyingLocationsMap[i].clear(); + nextAvailableVaryingLocation[i] = 0; + } + m_SharedFunctionMembers.clear(); + m_SharedDependencies.clear(); + } + + bool IsHullShaderInputAlreadyDeclared(const std::string& name) + { + bool isKnown = false; + + for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx) + { + if (m_hullShaderInputs[idx] == name) + { + isKnown = true; + break; + } + } + + return isKnown; + } + + void RecordHullShaderInput(const std::string& name) + { + m_hullShaderInputs.push_back(name); + } + + std::vector m_hullShaderInputs; +}; + +struct GLSLShader +{ + int shaderType; //One of the GL enums. + std::string sourceCode; + ShaderInfo reflection; + GLLang GLSLLanguage; + TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out +}; + +// Interface for retrieving reflection and diagnostics data +class HLSLccReflection +{ +public: + HLSLccReflection() {} + virtual ~HLSLccReflection() {} + + // Called on errors or diagnostic messages + virtual void OnDiagnostics(const std::string &error, int line, bool isError) {} + + virtual void OnInputBinding(const std::string &name, int bindIndex) {} + + // Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused + // constant buffers and vars from compute shaders where we need broader context than a single kernel to know + // if something can be dropped, as the constant buffers are shared between all kernels in a .compute file. + virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; } + + // Returns false if this constant var is not needed for this shader. See above. + virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize, bool isUsed) { return true; } + + virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {} + virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {} + virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {} + virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {} + virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {} + virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {} + + // these are for now metal only (but can be trivially added for other backends if needed) + // they are useful mostly for diagnostics as interim values are actually hidden from user + virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {} + virtual void OnBuiltinOutput(SPECIAL_NAME name) {} + virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {} + + + enum AccessType + { + ReadAccess = 1 << 0, + WriteAccess = 1 << 1 + }; + + virtual void OnStorageImage(int bindIndex, unsigned int access) {} +}; + + +/*HLSL constant buffers are treated as default-block unform arrays by default. This is done + to support versions of GLSL which lack ARB_uniform_buffer_object functionality. + Setting this flag causes each one to have its own uniform block. + Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/ +static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1; + +static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2; + +static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4; + +static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8; + +//GS enabled? +//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS). +//This flag is needed in order for the interfaces between stages to match when GS is in use. +//PS inputs VtxGeoOutput +//GS outputs VtxGeoOutput +//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise. +static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10; + +static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20; + +//Either use this flag or glBindFragDataLocationIndexed. +//When set the first pixel shader output is the first input to blend +//equation, the others go to the second input. +static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40; + +//If set, shader inputs and outputs are declared with their semantic name. +static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80; +//If set, shader inputs and outputs are declared with their semantic name appended. +static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100; + +//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername". +static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200; + +//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that) +static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400; + +//If set, global uniforms are not stored in a struct. +static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800; + +//If set, image declarations will always have binding and format qualifiers. +static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000; + +// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers +// Also removes that prefix from generated output +static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000; + +// If set, adds location qualifiers to intra-shader varyings. +static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf) + +// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS +// so that if that macro is undefined, all UBO declarations will become normal uniforms +static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000; + +// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code +static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000; + +#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d" + +// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtxx' +static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000; + +// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "_hlslcc_set_X_bind_Y" +// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData) +static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000; + +// If set, metal output will use linear sampler for shadow compares, otherwise point sampler. +static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000; + +// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead. +static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000; + +// Unused 0x200000; + +// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch +static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000; + +// Build for Switch. +static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000; + +// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name +// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue. +static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000; + +// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader +static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000; + +// Disable fastmath +static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000; + +//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that) +static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000; + +// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to +static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000; + +// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11) +static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000; + +// Code generation might vary for mobile targets, or using lower sampler precision than full by default +static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000; + +#ifdef __cplusplus +extern "C" { +#endif + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result +); + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/HLSLcc/include/hlslcc.hpp b/third_party/HLSLcc/include/hlslcc.hpp new file mode 100644 index 0000000..67a792a --- /dev/null +++ b/third_party/HLSLcc/include/hlslcc.hpp @@ -0,0 +1,3 @@ +extern "C" { +#include "hlslcc.h" +} diff --git a/third_party/HLSLcc/include/pstdint.h b/third_party/HLSLcc/include/pstdint.h new file mode 100644 index 0000000..5a53278 --- /dev/null +++ b/third_party/HLSLcc/include/pstdint.h @@ -0,0 +1,799 @@ +/* A portable stdint.h + **************************************************************************** + * BSD License: + **************************************************************************** + * + * Copyright (c) 2005-2011 Paul Hsieh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + **************************************************************************** + * + * Version 0.1.12 + * + * The ANSI C standard committee, for the C99 standard, specified the + * inclusion of a new standard include file called stdint.h. This is + * a very useful and long desired include file which contains several + * very precise definitions for integer scalar types that is + * critically important for making portable several classes of + * applications including cryptography, hashing, variable length + * integer libraries and so on. But for most developers its likely + * useful just for programming sanity. + * + * The problem is that most compiler vendors have decided not to + * implement the C99 standard, and the next C++ language standard + * (which has a lot more mindshare these days) will be a long time in + * coming and its unknown whether or not it will include stdint.h or + * how much adoption it will have. Either way, it will be a long time + * before all compilers come with a stdint.h and it also does nothing + * for the extremely large number of compilers available today which + * do not include this file, or anything comparable to it. + * + * So that's what this file is all about. Its an attempt to build a + * single universal include file that works on as many platforms as + * possible to deliver what stdint.h is supposed to. A few things + * that should be noted about this file: + * + * 1) It is not guaranteed to be portable and/or present an identical + * interface on all platforms. The extreme variability of the + * ANSI C standard makes this an impossibility right from the + * very get go. Its really only meant to be useful for the vast + * majority of platforms that possess the capability of + * implementing usefully and precisely defined, standard sized + * integer scalars. Systems which are not intrinsically 2s + * complement may produce invalid constants. + * + * 2) There is an unavoidable use of non-reserved symbols. + * + * 3) Other standard include files are invoked. + * + * 4) This file may come in conflict with future platforms that do + * include stdint.h. The hope is that one or the other can be + * used with no real difference. + * + * 5) In the current verison, if your platform can't represent + * int32_t, int16_t and int8_t, it just dumps out with a compiler + * error. + * + * 6) 64 bit integers may or may not be defined. Test for their + * presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX. + * Note that this is different from the C99 specification which + * requires the existence of 64 bit support in the compiler. If + * this is not defined for your platform, yet it is capable of + * dealing with 64 bits then it is because this file has not yet + * been extended to cover all of your system's capabilities. + * + * 7) (u)intptr_t may or may not be defined. Test for its presence + * with the test: #ifdef PTRDIFF_MAX. If this is not defined + * for your platform, then it is because this file has not yet + * been extended to cover all of your system's capabilities, not + * because its optional. + * + * 8) The following might not been defined even if your platform is + * capable of defining it: + * + * WCHAR_MIN + * WCHAR_MAX + * (u)int64_t + * PTRDIFF_MIN + * PTRDIFF_MAX + * (u)intptr_t + * + * 9) The following have not been defined: + * + * WINT_MIN + * WINT_MAX + * + * 10) The criteria for defining (u)int_least(*)_t isn't clear, + * except for systems which don't have a type that precisely + * defined 8, 16, or 32 bit types (which this include file does + * not support anyways). Default definitions have been given. + * + * 11) The criteria for defining (u)int_fast(*)_t isn't something I + * would trust to any particular compiler vendor or the ANSI C + * committee. It is well known that "compatible systems" are + * commonly created that have very different performance + * characteristics from the systems they are compatible with, + * especially those whose vendors make both the compiler and the + * system. Default definitions have been given, but its strongly + * recommended that users never use these definitions for any + * reason (they do *NOT* deliver any serious guarantee of + * improved performance -- not in this file, nor any vendor's + * stdint.h). + * + * 12) The following macros: + * + * PRINTF_INTMAX_MODIFIER + * PRINTF_INT64_MODIFIER + * PRINTF_INT32_MODIFIER + * PRINTF_INT16_MODIFIER + * PRINTF_LEAST64_MODIFIER + * PRINTF_LEAST32_MODIFIER + * PRINTF_LEAST16_MODIFIER + * PRINTF_INTPTR_MODIFIER + * + * are strings which have been defined as the modifiers required + * for the "d", "u" and "x" printf formats to correctly output + * (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t, + * (u)least32_t, (u)least16_t and (u)intptr_t types respectively. + * PRINTF_INTPTR_MODIFIER is not defined for some systems which + * provide their own stdint.h. PRINTF_INT64_MODIFIER is not + * defined if INT64_MAX is not defined. These are an extension + * beyond what C99 specifies must be in stdint.h. + * + * In addition, the following macros are defined: + * + * PRINTF_INTMAX_HEX_WIDTH + * PRINTF_INT64_HEX_WIDTH + * PRINTF_INT32_HEX_WIDTH + * PRINTF_INT16_HEX_WIDTH + * PRINTF_INT8_HEX_WIDTH + * PRINTF_INTMAX_DEC_WIDTH + * PRINTF_INT64_DEC_WIDTH + * PRINTF_INT32_DEC_WIDTH + * PRINTF_INT16_DEC_WIDTH + * PRINTF_INT8_DEC_WIDTH + * + * Which specifies the maximum number of characters required to + * print the number of that type in either hexadecimal or decimal. + * These are an extension beyond what C99 specifies must be in + * stdint.h. + * + * Compilers tested (all with 0 warnings at their highest respective + * settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32 + * bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio + * .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3 + * + * This file should be considered a work in progress. Suggestions for + * improvements, especially those which increase coverage are strongly + * encouraged. + * + * Acknowledgements + * + * The following people have made significant contributions to the + * development and testing of this file: + * + * Chris Howie + * John Steele Scott + * Dave Thorup + * John Dill + * + */ + +#include +#include +#include + +/* + * For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and + * do nothing else. On the Mac OS X version of gcc this is _STDINT_H_. + */ + +#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)))) && !defined(_PSTDINT_H_INCLUDED) +#include +#define _PSTDINT_H_INCLUDED +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +# endif +# ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +# endif +# ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +# endif +# ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +# endif +# ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +# endif +# ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +# endif +# ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +# endif +# ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif + +/* + * Something really weird is going on with Open Watcom. Just pull some of + * these duplicated definitions from Open Watcom's stdint.h file for now. + */ + +# if defined(__WATCOMC__) && __WATCOMC__ >= 1250 +# if !defined(INT64_C) +# define INT64_C(x) (x + (INT64_MAX - INT64_MAX)) +# endif +# if !defined(UINT64_C) +# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX)) +# endif +# if !defined(INT32_C) +# define INT32_C(x) (x + (INT32_MAX - INT32_MAX)) +# endif +# if !defined(UINT32_C) +# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX)) +# endif +# if !defined(INT16_C) +# define INT16_C(x) (x) +# endif +# if !defined(UINT16_C) +# define UINT16_C(x) (x) +# endif +# if !defined(INT8_C) +# define INT8_C(x) (x) +# endif +# if !defined(UINT8_C) +# define UINT8_C(x) (x) +# endif +# if !defined(UINT64_MAX) +# define UINT64_MAX 18446744073709551615ULL +# endif +# if !defined(INT64_MAX) +# define INT64_MAX 9223372036854775807LL +# endif +# if !defined(UINT32_MAX) +# define UINT32_MAX 4294967295UL +# endif +# if !defined(INT32_MAX) +# define INT32_MAX 2147483647L +# endif +# if !defined(INTMAX_MAX) +# define INTMAX_MAX INT64_MAX +# endif +# if !defined(INTMAX_MIN) +# define INTMAX_MIN INT64_MIN +# endif +# endif +#endif + +#ifndef _PSTDINT_H_INCLUDED +#define _PSTDINT_H_INCLUDED + +#ifndef SIZE_MAX +# define SIZE_MAX (~(size_t)0) +#endif + +/* + * Deduce the type assignments from limits.h under the assumption that + * integer sizes in bits are powers of 2, and follow the ANSI + * definitions. + */ + +#ifndef UINT8_MAX +# define UINT8_MAX 0xff +#endif +#ifndef uint8_t +# if (UCHAR_MAX == UINT8_MAX) || defined(S_SPLINT_S) +typedef unsigned char uint8_t; +# define UINT8_C(v) ((uint8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef INT8_MAX +# define INT8_MAX 0x7f +#endif +#ifndef INT8_MIN +# define INT8_MIN INT8_C(0x80) +#endif +#ifndef int8_t +# if (SCHAR_MAX == INT8_MAX) || defined(S_SPLINT_S) +typedef signed char int8_t; +# define INT8_C(v) ((int8_t) v) +# else +# error "Platform not supported" +# endif +#endif + +#ifndef UINT16_MAX +# define UINT16_MAX 0xffff +#endif +#ifndef uint16_t +#if (UINT_MAX == UINT16_MAX) || defined(S_SPLINT_S) +typedef unsigned int uint16_t; +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +# define UINT16_C(v) ((uint16_t) (v)) +#elif (USHRT_MAX == UINT16_MAX) +typedef unsigned short uint16_t; +# define UINT16_C(v) ((uint16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT16_MAX +# define INT16_MAX 0x7fff +#endif +#ifndef INT16_MIN +# define INT16_MIN INT16_C(0x8000) +#endif +#ifndef int16_t +#if (INT_MAX == INT16_MAX) || defined(S_SPLINT_S) +typedef signed int int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "" +# endif +#elif (SHRT_MAX == INT16_MAX) +typedef signed short int16_t; +# define INT16_C(v) ((int16_t) (v)) +# ifndef PRINTF_INT16_MODIFIER +# define PRINTF_INT16_MODIFIER "h" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef UINT32_MAX +# define UINT32_MAX (0xffffffffUL) +#endif +#ifndef uint32_t +#if (ULONG_MAX == UINT32_MAX) || defined(S_SPLINT_S) +typedef unsigned long uint32_t; +# define UINT32_C(v) v ## UL +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (UINT_MAX == UINT32_MAX) +typedef unsigned int uint32_t; +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +# define UINT32_C(v) v ## U +#elif (USHRT_MAX == UINT32_MAX) +typedef unsigned short uint32_t; +# define UINT32_C(v) ((unsigned short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +#ifndef INT32_MAX +# define INT32_MAX (0x7fffffffL) +#endif +#ifndef INT32_MIN +# define INT32_MIN INT32_C(0x80000000) +#endif +#ifndef int32_t +#if (LONG_MAX == INT32_MAX) || defined(S_SPLINT_S) +typedef signed long int32_t; +# define INT32_C(v) v ## L +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "l" +# endif +#elif (INT_MAX == INT32_MAX) +typedef signed int int32_t; +# define INT32_C(v) v +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#elif (SHRT_MAX == INT32_MAX) +typedef signed short int32_t; +# define INT32_C(v) ((short) (v)) +# ifndef PRINTF_INT32_MODIFIER +# define PRINTF_INT32_MODIFIER "" +# endif +#else +#error "Platform not supported" +#endif +#endif + +/* + * The macro stdint_int64_defined is temporarily used to record + * whether or not 64 integer support is available. It must be + * defined for any 64 integer extensions for new platforms that are + * added. + */ + +#undef stdint_int64_defined +#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined(S_SPLINT_S) +# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined(S_SPLINT_S) +# define stdint_int64_defined +typedef long long int64_t; +typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# endif +#endif + +#if !defined(stdint_int64_defined) +# if defined(__GNUC__) +# define stdint_int64_defined +__extension__ typedef long long int64_t; +__extension__ typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) || defined(S_SPLINT_S) +# define stdint_int64_defined +typedef long long int64_t; +typedef unsigned long long uint64_t; +# define UINT64_C(v) v ## ULL +# define INT64_C(v) v ## LL +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "ll" +# endif +# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC) +# define stdint_int64_defined +typedef __int64 int64_t; +typedef unsigned __int64 uint64_t; +# define UINT64_C(v) v ## UI64 +# define INT64_C(v) v ## I64 +# ifndef PRINTF_INT64_MODIFIER +# define PRINTF_INT64_MODIFIER "I64" +# endif +# endif +#endif + +#if !defined(LONG_LONG_MAX) && defined(INT64_C) +# define LONG_LONG_MAX INT64_C (9223372036854775807) +#endif +#ifndef ULONG_LONG_MAX +# define ULONG_LONG_MAX UINT64_C (18446744073709551615) +#endif + +#if !defined(INT64_MAX) && defined(INT64_C) +# define INT64_MAX INT64_C (9223372036854775807) +#endif +#if !defined(INT64_MIN) && defined(INT64_C) +# define INT64_MIN INT64_C (-9223372036854775808) +#endif +#if !defined(UINT64_MAX) && defined(INT64_C) +# define UINT64_MAX UINT64_C (18446744073709551615) +#endif + +/* + * Width of hexadecimal for number field. + */ + +#ifndef PRINTF_INT64_HEX_WIDTH +# define PRINTF_INT64_HEX_WIDTH "16" +#endif +#ifndef PRINTF_INT32_HEX_WIDTH +# define PRINTF_INT32_HEX_WIDTH "8" +#endif +#ifndef PRINTF_INT16_HEX_WIDTH +# define PRINTF_INT16_HEX_WIDTH "4" +#endif +#ifndef PRINTF_INT8_HEX_WIDTH +# define PRINTF_INT8_HEX_WIDTH "2" +#endif + +#ifndef PRINTF_INT64_DEC_WIDTH +# define PRINTF_INT64_DEC_WIDTH "20" +#endif +#ifndef PRINTF_INT32_DEC_WIDTH +# define PRINTF_INT32_DEC_WIDTH "10" +#endif +#ifndef PRINTF_INT16_DEC_WIDTH +# define PRINTF_INT16_DEC_WIDTH "5" +#endif +#ifndef PRINTF_INT8_DEC_WIDTH +# define PRINTF_INT8_DEC_WIDTH "3" +#endif + +/* + * Ok, lets not worry about 128 bit integers for now. Moore's law says + * we don't need to worry about that until about 2040 at which point + * we'll have bigger things to worry about. + */ + +#ifdef stdint_int64_defined +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; +# define INTMAX_MAX INT64_MAX +# define INTMAX_MIN INT64_MIN +# define UINTMAX_MAX UINT64_MAX +# define UINTMAX_C(v) UINT64_C(v) +# define INTMAX_C(v) INT64_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH +# endif +#else +typedef int32_t intmax_t; +typedef uint32_t uintmax_t; +# define INTMAX_MAX INT32_MAX +# define UINTMAX_MAX UINT32_MAX +# define UINTMAX_C(v) UINT32_C(v) +# define INTMAX_C(v) INT32_C(v) +# ifndef PRINTF_INTMAX_MODIFIER +# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER +# endif +# ifndef PRINTF_INTMAX_HEX_WIDTH +# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH +# endif +# ifndef PRINTF_INTMAX_DEC_WIDTH +# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH +# endif +#endif + +/* + * Because this file currently only supports platforms which have + * precise powers of 2 as bit sizes for the default integers, the + * least definitions are all trivial. Its possible that a future + * version of this file could have different definitions. + */ + +#ifndef stdint_least_defined +typedef int8_t int_least8_t; +typedef uint8_t uint_least8_t; +typedef int16_t int_least16_t; +typedef uint16_t uint_least16_t; +typedef int32_t int_least32_t; +typedef uint32_t uint_least32_t; +# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER +# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER +# define UINT_LEAST8_MAX UINT8_MAX +# define INT_LEAST8_MAX INT8_MAX +# define UINT_LEAST16_MAX UINT16_MAX +# define INT_LEAST16_MAX INT16_MAX +# define UINT_LEAST32_MAX UINT32_MAX +# define INT_LEAST32_MAX INT32_MAX +# define INT_LEAST8_MIN INT8_MIN +# define INT_LEAST16_MIN INT16_MIN +# define INT_LEAST32_MIN INT32_MIN +# ifdef stdint_int64_defined +typedef int64_t int_least64_t; +typedef uint64_t uint_least64_t; +# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER +# define UINT_LEAST64_MAX UINT64_MAX +# define INT_LEAST64_MAX INT64_MAX +# define INT_LEAST64_MIN INT64_MIN +# endif +#endif +#undef stdint_least_defined + +/* + * The ANSI C committee pretending to know or specify anything about + * performance is the epitome of misguided arrogance. The mandate of + * this file is to *ONLY* ever support that absolute minimum + * definition of the fast integer types, for compatibility purposes. + * No extensions, and no attempt to suggest what may or may not be a + * faster integer type will ever be made in this file. Developers are + * warned to stay away from these types when using this or any other + * stdint.h. + */ + +typedef int_least8_t int_fast8_t; +typedef uint_least8_t uint_fast8_t; +typedef int_least16_t int_fast16_t; +typedef uint_least16_t uint_fast16_t; +typedef int_least32_t int_fast32_t; +typedef uint_least32_t uint_fast32_t; +#define UINT_FAST8_MAX UINT_LEAST8_MAX +#define INT_FAST8_MAX INT_LEAST8_MAX +#define UINT_FAST16_MAX UINT_LEAST16_MAX +#define INT_FAST16_MAX INT_LEAST16_MAX +#define UINT_FAST32_MAX UINT_LEAST32_MAX +#define INT_FAST32_MAX INT_LEAST32_MAX +#define INT_FAST8_MIN INT_LEAST8_MIN +#define INT_FAST16_MIN INT_LEAST16_MIN +#define INT_FAST32_MIN INT_LEAST32_MIN +#ifdef stdint_int64_defined +typedef int_least64_t int_fast64_t; +typedef uint_least64_t uint_fast64_t; +# define UINT_FAST64_MAX UINT_LEAST64_MAX +# define INT_FAST64_MAX INT_LEAST64_MAX +# define INT_FAST64_MIN INT_LEAST64_MIN +#endif + +#undef stdint_int64_defined + +/* + * Whatever piecemeal, per compiler thing we can do about the wchar_t + * type limits. + */ + +#if defined(__WATCOMC__) || defined(_MSC_VER) || defined(__GNUC__) +# include +# ifndef WCHAR_MIN +# define WCHAR_MIN 0 +# endif +# ifndef WCHAR_MAX +# define WCHAR_MAX ((wchar_t)-1) +# endif +#endif + +/* + * Whatever piecemeal, per compiler/platform thing we can do about the + * (u)intptr_t types and limits. + */ + +#if defined(_MSC_VER) && defined(_UINTPTR_T_DEFINED) +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +#ifndef STDINT_H_UINTPTR_T_DEFINED +# if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) || defined(_WIN64) +# define stdint_intptr_bits 64 +# elif defined(__WATCOMC__) || defined(__TURBOC__) +# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__) +# define stdint_intptr_bits 16 +# else +# define stdint_intptr_bits 32 +# endif +# elif defined(__i386__) || defined(_WIN32) || defined(WIN32) +# define stdint_intptr_bits 32 +# elif defined(__INTEL_COMPILER) +#error Unknown compiler +# endif + +# ifdef stdint_intptr_bits +# define stdint_intptr_glue3_i(a, b, c) a##b##c +# define stdint_intptr_glue3(a, b, c) stdint_intptr_glue3_i(a,b,c) +# ifndef PRINTF_INTPTR_MODIFIER +# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER) +# endif +# ifndef PTRDIFF_MAX +# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef PTRDIFF_MIN +# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef UINTPTR_MAX +# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MAX +# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX) +# endif +# ifndef INTPTR_MIN +# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN) +# endif +# ifndef INTPTR_C +# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x) +# endif +# ifndef UINTPTR_C +# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x) +# endif +typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t; +typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t; +# else +#error Unknown compiler +# endif +# define STDINT_H_UINTPTR_T_DEFINED +#endif + +/* + * Assumes sig_atomic_t is signed and we have a 2s complement machine. + */ + +#ifndef SIG_ATOMIC_MAX +# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1) +#endif + +#endif + +#if defined(__TEST_PSTDINT_FOR_CORRECTNESS) + +/* + * Please compile with the maximum warning settings to make sure macros are not + * defined more than once. + */ + +#include +#include +#include + +#define glue3_aux(x, y, z) x ## y ## z +#define glue3(x, y, z) glue3_aux(x,y,z) + +#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0); +#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0); + +#define DECL(us, bits) glue3(DECL,us,) (bits) + +#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits) + +int main() +{ + DECL(I, 8) + DECL(U, 8) + DECL(I, 16) + DECL(U, 16) + DECL(I, 32) + DECL(U, 32) +#ifdef INT64_MAX + DECL(I, 64) + DECL(U, 64) +#endif + intmax_t imax = INTMAX_C(0); + uintmax_t umax = UINTMAX_C(0); + char str0[256], str1[256]; + + sprintf(str0, "%d %x\n", 0, ~0); + + sprintf(str1, "%d %x\n", i8, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i8 : %s\n", str1); + sprintf(str1, "%u %x\n", u8, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u8 : %s\n", str1); + sprintf(str1, "%d %x\n", i16, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i16 : %s\n", str1); + sprintf(str1, "%u %x\n", u16, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u16 : %s\n", str1); + sprintf(str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i32 : %s\n", str1); + sprintf(str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with u32 : %s\n", str1); +#ifdef INT64_MAX + sprintf(str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with i64 : %s\n", str1); +#endif + sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with imax : %s\n", str1); + sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0); + if (0 != strcmp(str0, str1)) printf("Something wrong with umax : %s\n", str1); + + TESTUMAX(8); + TESTUMAX(16); + TESTUMAX(32); +#ifdef INT64_MAX + TESTUMAX(64); +#endif + + return EXIT_SUCCESS; +} + +#endif diff --git a/third_party/HLSLcc/license.txt b/third_party/HLSLcc/license.txt new file mode 100644 index 0000000..6e2d4bb --- /dev/null +++ b/third_party/HLSLcc/license.txt @@ -0,0 +1,53 @@ + +Original HLSLcc source code Copyright (c) 2012 James Jones +Further improvements Copyright (c) 2014-2016 Unity Technologies +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +the rights to use, copy, modify, merge, publish, distribute, sublicense, +and/or sell copies of the Software, and to permit persons to whom the +Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN +THE SOFTWARE. + +This software makes use of the bstring library which is provided under the following license: + +Copyright (c) 2002-2008 Paul Hsieh +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of bstrlib nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. \ No newline at end of file diff --git a/third_party/HLSLcc/src/ControlFlowGraph.cpp b/third_party/HLSLcc/src/ControlFlowGraph.cpp new file mode 100644 index 0000000..bf45aae --- /dev/null +++ b/third_party/HLSLcc/src/ControlFlowGraph.cpp @@ -0,0 +1,815 @@ +#include "internal_includes/debug.h" +#include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/ControlFlowGraphUtils.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +using namespace HLSLcc::ControlFlow; +using HLSLcc::ForEachOperand; + +const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction) +{ + using std::for_each; + + m_BlockMap.clear(); + m_BlockStorage.clear(); + + // Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does + BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction); + + // Build the reachable set for each block + bool hadChanges; + do + { + hadChanges = false; + for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr &bb) + { + BasicBlock &b = *bb.get(); + if (b.RebuildReachable()) + { + hadChanges = true; + } + }); + } + while (hadChanges == true); + + return *root; +} + +const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const +{ + BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; + + return itr->second; +} + +BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) +{ + BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction)); + if (itr == m_BlockMap.end()) + return NULL; + + return itr->second; +} + +// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build(). +// Auto-registers itself into ControlFlowGraph +BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction) + : m_Graph(graph) + , m_First(psFirst) + , m_Last(NULL) + , m_End(endInstruction) +{ + m_UEVar.clear(); + m_VarKill.clear(); + m_Preceding.clear(); + m_Succeeding.clear(); + m_DEDef.clear(); + m_Reachable.clear(); + + // Check that we've pruned the labels + ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst)); + + // Insert to block storage, block map and connect to previous block + m_Graph.m_BlockStorage.push_back(shared_ptr(this)); + + bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second; + ASSERT(didInsert); + + if (psPrecedingBlockHead != NULL) + { + m_Preceding.insert(psPrecedingBlockHead); + BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead); + ASSERT(prec != 0); + didInsert = prec->m_Succeeding.insert(psFirst).second; + ASSERT(didInsert); + } + + Build(); +} + +void BasicBlock::Build() +{ + const Instruction *inst = m_First; + while (inst != m_End) + { + // Process sources first + ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + // Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore + if (m_VarKill.find(regIdx) != m_VarKill.end()) + continue; + + // Add to UEVars set. Doesn't matter if it's already there. + m_UEVar.insert(regIdx); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, + [this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add to kill set. Dupes are fine, this is a set. + m_VarKill.insert(regIdx); + // Also into the downward definitions. Overwrite the previous definition in this basic block, if any + Definition d(psInst, psOperand); + m_DEDef[regIdx].clear(); + m_DEDef[regIdx].insert(d); + } + return; + }); + + // Check for flow control instructions + bool blockDone = false; + switch (inst->eOpcode) + { + default: + break; + case OPCODE_RET: + // Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309) + // blockDone = true; + break; + case OPCODE_RETC: + // Basic block is done, start a next one. + // There REALLY should be no existing blocks for this one + ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst + 1)) == NULL); + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + case OPCODE_LOOP: + case OPCODE_CASE: + case OPCODE_ENDIF: + case OPCODE_ENDSWITCH: + // Not a flow control branch, but need to start a new block anyway. + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + blockDone = true; + break; + + // Branches + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + // The control branches to the next instruction or jumps to jumpPoint + AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1)); + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + case OPCODE_SWITCH: + { + bool sawEndSwitch = false; + bool needConnectToParent = false; + const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + + while (1) + { + if (!sawEndSwitch || needConnectToParent) + AddChildBasicBlock(jumpPoint); + + if (sawEndSwitch) + break; + + // The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label + ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT); + jumpPoint = Utils::GetJumpPoint(jumpPoint - 1, &sawEndSwitch, &needConnectToParent); + ASSERT(jumpPoint != NULL); + } + blockDone = true; + break; + } + + // Non-conditional jumps + case OPCODE_BREAK: + case OPCODE_ELSE: + case OPCODE_CONTINUE: + case OPCODE_ENDLOOP: + { + const Instruction *jumpPoint = Utils::GetJumpPoint(inst); + ASSERT(jumpPoint != NULL); + + AddChildBasicBlock(jumpPoint); + + blockDone = true; + break; + } + } + + if (blockDone) + break; + + inst++; + } + // In initial building phase, just make m_Reachable equal to m_DEDef + m_Reachable = m_DEDef; + + // Tag the end of the basic block + m_Last = std::max(m_First, std::min(inst, m_End - 1)); +// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id); +} + +BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst) +{ + // First see if this already exists + BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst); + if (b) + { + // Just add dependency and we're done + b->m_Preceding.insert(m_First); + m_Succeeding.insert(psFirst); + return b; + } + // Otherwise create one. Self-registering and self-connecting + return new BasicBlock(psFirst, m_Graph, m_First, m_End); +} + +bool BasicBlock::RebuildReachable() +{ + // Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes. + // Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill()) + + ReachableVariables newReachable = m_DEDef; + bool hasChanges = false; + + // Loop each predecessor + std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr) + { + const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr); + const ReachableVariables &precReachable = prec->Reachable(); + + // Loop each variable*component + std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair &itr2) + { + uint32_t regIdx = itr2.first; + const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second; + + // Already killed in this block? + if (VarKill().find(regIdx) != VarKill().end()) + return; + + // Only do comparisons against current definitions if we've yet to find any changes + BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0; + if (!hasChanges) + currReachablePerVar = &m_Reachable[regIdx]; + + BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx]; + + // Loop each definition + std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d) + { + if (!hasChanges) + { + // Check if already there + if (currReachablePerVar->find(d) == currReachablePerVar->end()) + hasChanges = true; + } + newReachablePerVar.insert(d); + }); // definition + }); // variable*component + }); // predecessor + + if (hasChanges) + { + std::swap(m_Reachable, newReachable); + } + + return hasChanges; +} + +void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b) +{ + std::for_each(b.begin(), b.end(), [&a](const std::pair &rpvPair) + { + uint32_t regIdx = rpvPair.first; + const ReachableDefinitionsPerVariable &rpv = rpvPair.second; + // No previous definitions for this variable? + auto aRPVItr = a.find(regIdx); + if (aRPVItr == a.end()) + { + // Just set the definitions and continue + a[regIdx] = rpv; + return; + } + ReachableDefinitionsPerVariable &aRPV = aRPVItr->second; + aRPV.insert(rpv.begin(), rpv.end()); + }); +} + +#if ENABLE_UNIT_TESTS + +#define UNITY_EXTERNAL_TOOL 1 +#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS +#include "Testing.h" // From Runtime/Testing + +UNIT_TEST_SUITE(HLSLcc) +{ + TEST(ControlFlowGraph_Build_Simple_Works) + { + Instruction inst[] = + { + // MOV t0.xyzw, I0.xyzw + Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf), + Instruction(1, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[1], root.Last()); + + CHECK(root.Preceding().empty()); + CHECK(root.Succeeding().empty()); + + CHECK_EQUAL(4, root.VarKill().size()); + + // Check that all components from t0 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand); + CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction); + CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand); + } + + TEST(ControlFlowGraph_Build_If_Works) + { + Instruction inst[] = + { + // B0 + // 0: MOV t1.xyzw, i0.xyzw + Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf), + // 1: MUL t0, t1, t1 + Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf), + // 2: IF t1.y + Instruction(2, OPCODE_IF, 1, 2), + // B1 + // 3: MOV o0, t0 + Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf), + // 4: + Instruction(4, OPCODE_ELSE), + // B2 + // 5: MOV o0, t1 + Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // 6: + Instruction(6, OPCODE_ENDIF), + // B3 + // 7: + Instruction(7, OPCODE_NOP), + // 8: + Instruction(8, OPCODE_RET) + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); + + CHECK_EQUAL(root.First(), &inst[0]); + CHECK_EQUAL(root.Last(), &inst[2]); + + CHECK(root.Preceding().empty()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + CHECK_EQUAL(&inst[3], b1->First()); + CHECK_EQUAL(&inst[5], b2->First()); + CHECK_EQUAL(&inst[7], b3->First()); + + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[6], b2->Last()); + CHECK_EQUAL(&inst[8], b3->Last()); + + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[5])); + CHECK_EQUAL(2, root.Succeeding().size()); + + CHECK_EQUAL(1, b1->Preceding().size()); + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + + CHECK_EQUAL(1, b2->Preceding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + + CHECK_EQUAL(2, b3->Preceding().size()); + CHECK_EQUAL(0, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().count(&inst[5])); + + // The if block must have upwards-exposed t0 + CHECK_EQUAL(1, b1->UEVar().count(0)); + CHECK_EQUAL(1, b1->UEVar().count(1)); + CHECK_EQUAL(1, b1->UEVar().count(2)); + CHECK_EQUAL(1, b1->UEVar().count(3)); + + // The else block must have upwards-exposed t1 + CHECK_EQUAL(1, b2->UEVar().count(4)); + CHECK_EQUAL(1, b2->UEVar().count(5)); + CHECK_EQUAL(1, b2->UEVar().count(6)); + CHECK_EQUAL(1, b2->UEVar().count(7)); + + CHECK_EQUAL(8, root.VarKill().size()); + + // Check that all components from t0 and t1 are killed + CHECK_EQUAL(1, root.VarKill().count(0)); + CHECK_EQUAL(1, root.VarKill().count(1)); + CHECK_EQUAL(1, root.VarKill().count(2)); + CHECK_EQUAL(1, root.VarKill().count(3)); + + CHECK_EQUAL(1, root.VarKill().count(4)); + CHECK_EQUAL(1, root.VarKill().count(5)); + CHECK_EQUAL(1, root.VarKill().count(6)); + CHECK_EQUAL(1, root.VarKill().count(7)); + + // The expected downwards-exposed definitions: + // B0: t0, t1 + // B1-B3: none + + CHECK_EQUAL(8, root.DEDef().size()); + CHECK_EQUAL(0, b1->DEDef().size()); + CHECK_EQUAL(0, b2->DEDef().size()); + CHECK_EQUAL(0, b3->DEDef().size()); + + CHECK(root.DEDef() == root.Reachable()); + + CHECK(root.Reachable() == b1->Reachable()); + CHECK(root.Reachable() == b2->Reachable()); + CHECK(root.Reachable() == b3->Reachable()); + } + + TEST(ControlFlowGraph_Build_SwitchCase_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xyz, I0.yzw + Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe), + // i2: MOVE t1.w, t0.x + Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1), + // i3: MOVE t2, I0 + Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf), + // i4: SWITCH t0.y + Instruction(4, OPCODE_SWITCH, 1, 2), + // End B0 + // i5: CASE + Instruction(5, OPCODE_CASE), + // i6: DEFAULT + Instruction(6, OPCODE_DEFAULT), + // Start B1 + // i7: MOC t1.z, t0.x + Instruction(7, OPCODE_MOV, 1, 4, 0, 1), + // i8: CASE + Instruction(8, OPCODE_CASE), + // End B1 + // Start B2 + // i9: MOV t1.z, t2.x + Instruction(9, OPCODE_MOV, 1, 4, 2, 1), + // i10: BREAK + Instruction(10, OPCODE_BREAK), + // End B2 + // i11: CASE + Instruction(11, OPCODE_CASE), + // Start B3 + // i12: MOV t1.z, t2.y + Instruction(12, OPCODE_MOV, 1, 4, 2, 2), + // i13: BREAKC t0.x + Instruction(13, OPCODE_BREAKC, 0, 1), + // End B3 + // i14: CASE + Instruction(14, OPCODE_CASE), + // Start B4 + // i15: MOV t1.z, t2.z + Instruction(15, OPCODE_MOV, 1, 4, 2, 4), + // i16: ENDSWITCH + Instruction(16, OPCODE_ENDSWITCH), + // End B4 + // Start B5 + // i17: MOV o0, t1 + Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf), + // i18: RET + Instruction(18, OPCODE_RET) + // End B5 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[4], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]); + const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]); + const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + CHECK(b4 != NULL); + CHECK(b5 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[8], b1->Last()); + CHECK_EQUAL(&inst[10], b2->Last()); + CHECK_EQUAL(&inst[13], b3->Last()); + CHECK_EQUAL(&inst[16], b4->Last()); + CHECK_EQUAL(&inst[18], b5->Last()); + + // Nothing before the root, nothing after b5 + CHECK(root.Preceding().empty()); + CHECK(b5->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + // B0->B2 + // B0->B3 + // B0->B4 + CHECK_EQUAL(1, root.Succeeding().count(&inst[7])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[9])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[12])); + CHECK_EQUAL(1, root.Succeeding().count(&inst[15])); + + CHECK_EQUAL(4, root.Succeeding().size()); + + // B1 + + // B1->B2 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[9])); + CHECK_EQUAL(1, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b1->Preceding().size()); + + // B2 + + // B2->B5 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[7])); + CHECK_EQUAL(1, b2->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b2->Preceding().size()); + + // B3 + // B3->B4 + // B3->B5 + CHECK_EQUAL(1, b3->Succeeding().count(&inst[15])); + CHECK_EQUAL(1, b3->Succeeding().count(&inst[17])); + CHECK_EQUAL(2, b3->Succeeding().size()); + CHECK_EQUAL(1, b3->Preceding().count(&inst[0])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // B4 + CHECK_EQUAL(1, b4->Succeeding().count(&inst[17])); + CHECK_EQUAL(1, b4->Succeeding().size()); + CHECK_EQUAL(1, b4->Preceding().count(&inst[0])); + CHECK_EQUAL(2, b4->Preceding().size()); + + // B5 + CHECK_EQUAL(0, b5->Succeeding().size()); + CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4 + CHECK_EQUAL(1, b5->Preceding().count(&inst[9])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[12])); + CHECK_EQUAL(1, b5->Preceding().count(&inst[15])); + + + // Verify reachable sets + + CHECK(root.Reachable() == root.DEDef()); + CHECK_EQUAL(9, root.Reachable().size()); + + // B5 should have these reachables: + // t0.x only from b0 + // t1.xy from b0, i1 + // t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2) + // t1.w from b0, i2 + // t2.xyzw from b0, i3 + + // Cast away const so [] works. + BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable(); + + CHECK_EQUAL(9, r.size()); + + CHECK_EQUAL(1, r[0].size()); + CHECK_EQUAL(0, r[1].size()); + CHECK_EQUAL(0, r[2].size()); + CHECK_EQUAL(0, r[3].size()); + CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction); + + CHECK_EQUAL(1, r[4].size()); + CHECK_EQUAL(1, r[5].size()); + CHECK_EQUAL(3, r[6].size()); + CHECK_EQUAL(1, r[7].size()); + + const BasicBlock::ReachableDefinitionsPerVariable &d = r[6]; + BasicBlock::ReachableDefinitionsPerVariable t; + t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0])); + t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0])); + + CHECK(t == d); + + CHECK_EQUAL(1, r[8].size()); + CHECK_EQUAL(1, r[9].size()); + CHECK_EQUAL(1, r[10].size()); + CHECK_EQUAL(1, r[11].size()); + } + + TEST(ControlFlowGraph_Build_Loop_Works) + { + Instruction inst[] = + { + // Start B0 + // i0: MOV t0.x, I0.x + Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1), + // i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should. + Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc), + // i2: LOOP + Instruction(2, OPCODE_LOOP, 1, 2), + // End B0 -> B1 + // Begin B1 + // i3: MOV t1.x, t0.x + Instruction(3, OPCODE_MOV, 1, 1, 0, 1), + // i4: BREAKC t0.x + Instruction(4, OPCODE_BREAKC, 0, 1), + // End B1 -> B2, B3 + // Begin B2 + // i5: ADD t0.x, t0.y + Instruction(5, OPCODE_ADD, 0, 1, 0, 2), + // i6: MOV t1.x, t0.x // This should never show up as definition + Instruction(6, OPCODE_MOV, 1, 1, 0, 1), + // i7: ENDLOOP + Instruction(7, OPCODE_ENDLOOP), + // End B2 -> B1 + // Start B3 + // i8: MOV O0.x, t1.x + Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1), + // i9: RET + Instruction(9, OPCODE_RET), + // End B3 + }; + + ControlFlowGraph cfg; + const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst)); + + CHECK_EQUAL(&inst[0], root.First()); + CHECK_EQUAL(&inst[2], root.Last()); + + const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]); + const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]); + const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]); + + CHECK(b1 != NULL); + CHECK(b2 != NULL); + CHECK(b3 != NULL); + + // Check instruction ranges + CHECK_EQUAL(&inst[4], b1->Last()); + CHECK_EQUAL(&inst[7], b2->Last()); + CHECK_EQUAL(&inst[9], b3->Last()); + + // Nothing before the root, nothing after b3 + CHECK(root.Preceding().empty()); + CHECK(b3->Succeeding().empty()); + + // Check that all connections are there and no others. + + // B0->B1 + CHECK_EQUAL(1, root.Succeeding().count(&inst[3])); + CHECK_EQUAL(1, root.Succeeding().size()); + + // B1 + + // B1->B2 + // B1->B3 + CHECK_EQUAL(1, b1->Succeeding().count(&inst[5])); + CHECK_EQUAL(1, b1->Succeeding().count(&inst[8])); + CHECK_EQUAL(2, b1->Succeeding().size()); + + // B0->B1, reverse + CHECK_EQUAL(1, b1->Preceding().count(&inst[0])); + // We may also come from B2 + CHECK_EQUAL(1, b1->Preceding().count(&inst[5])); + CHECK_EQUAL(2, b1->Preceding().size()); + + // B2 + + // B2->B1 + CHECK_EQUAL(1, b2->Succeeding().count(&inst[3])); + CHECK_EQUAL(1, b2->Succeeding().size()); + CHECK_EQUAL(1, b2->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b2->Preceding().size()); + + // B3 + CHECK_EQUAL(1, b3->Preceding().count(&inst[3])); + CHECK_EQUAL(1, b3->Preceding().size()); + + // Verify reachable sets + + + BasicBlock::ReachableVariables t; + + // B0 DEDef and Reachable + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + CHECK(root.DEDef() == t); + CHECK(root.Reachable() == root.DEDef()); + + // B1 DEDef and Reachable + t.clear(); + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + CHECK(b1->DEDef() == t); + + t = b1->DEDef(); + // t0.x from i0, t1.y (but not .x) from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t0.x from i5, but nothing from i6 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + CHECK(b1->Reachable() == t); + + // B2 + t.clear(); + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0])); + CHECK(b2->DEDef() == t); + + t = b2->DEDef(); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + CHECK(b2->Reachable() == t); + + // B3 + t.clear(); + CHECK(b3->DEDef() == t); + // t0.x from i0, t1.y from i1 + t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0])); + t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0])); + + // t1.x from i3 + t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0])); + + // t0.x from i5 + t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0])); + + CHECK(b3->Reachable() == t); + } +} + +#endif diff --git a/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp b/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp new file mode 100644 index 0000000..b74fbed --- /dev/null +++ b/third_party/HLSLcc/src/ControlFlowGraphUtils.cpp @@ -0,0 +1,116 @@ +#include "ControlFlowGraphUtils.h" + +#include "internal_includes/debug.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" + + +// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH +const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/) +{ + const Instruction *inst = psStart; + // Skip CASE/DEFAULT/ENDSWITCH/LOOP labels + while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP) + { + // We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it) + ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL); + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL) + *sawEndSwitch = true; + inst++; + } + return inst; +} + +// For a given flow-control instruction, find the corresponding jump location: +// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 +// For ELSE, find same level ENDIF + 1 +// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 +// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 +// For ENDLOOP, find previous same-level LOOP + 1 +// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels +// For CONTINUE/C the previous LOOP + 1 +// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. +// Note that CASE labels fall through. +// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. +const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/) +{ + const Instruction *inst = psStart; + int depth = 0; + OPCODE_TYPE op = psStart->eOpcode; + ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC + || op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT + || op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC); + + switch (op) + { + default: + ASSERT(0); + break; + case OPCODE_IF: + case OPCODE_ELSE: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_IF) + depth++; + if (inst->eOpcode == OPCODE_ENDIF) + depth--; + } + case OPCODE_BREAK: + case OPCODE_BREAKC: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP) + depth--; + } + case OPCODE_CONTINUE: + case OPCODE_CONTINUEC: + case OPCODE_ENDLOOP: + while (1) + { + inst--; + if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0)) + { + return GetNextNonLabelInstruction(inst + 1, sawEndSwitch); + } + if (inst->eOpcode == OPCODE_LOOP) + depth--; + if (inst->eOpcode == OPCODE_ENDLOOP) + depth++; + } + case OPCODE_SWITCH: + case OPCODE_CASE: + case OPCODE_DEFAULT: + while (1) + { + inst++; + if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0)) + { + // Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH + // so that BasicBlock::Build can distinguish between there being a direct route + // from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not. + + if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0) + *sawEndSwitch = true; + + return GetNextNonLabelInstruction(inst + 1, needConnectToParent); + } + if (inst->eOpcode == OPCODE_SWITCH) + depth++; + if (inst->eOpcode == OPCODE_ENDSWITCH) + depth--; + } + } + return 0; +} diff --git a/third_party/HLSLcc/src/DataTypeAnalysis.cpp b/third_party/HLSLcc/src/DataTypeAnalysis.cpp new file mode 100644 index 0000000..a53fc5a --- /dev/null +++ b/third_party/HLSLcc/src/DataTypeAnalysis.cpp @@ -0,0 +1,777 @@ +#include "internal_includes/debug.h" +#include "internal_includes/tokens.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" +#include + + +// Helper function to set the vector type of 1 or more components in a vector +// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done +// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to +// to the "highest" type value (ordering int->uint->float) +static void SetVectorType(std::vector &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress) +{ + int i = 0; + + // Expand the mask to include all components that are used, also upgrade type + for (i = 0; i < 4; i++) + { + if (aeTempVecType[regBaseIndex + i] != SVT_VOID) + { + componentMask |= (1 << i); + eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]); + } + } + + // Now componentMask contains the components we actually need to update and eType may have been changed to something else. + // Write the results + for (i = 0; i < 4; i++) + { + if (componentMask & (1 << i)) + { + if (aeTempVecType[regBaseIndex + i] != eType) + { + aeTempVecType[regBaseIndex + i] = eType; + if (psMadeProgress) + *psMadeProgress = 1; + } + } + } +} + +static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault) +{ + SHADER_VARIABLE_TYPE eType = eDefault; + switch (prec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + eType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + eType = SVT_UINT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + eType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + eType = SVT_FLOAT16; + break; + default: + ASSERT(0); // Catch this to see what's going on. + break; + } + return eType; +} + +static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) +{ + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4; + uint32_t mask = psOperand->GetAccessMask(); + // Adjust type based on operand precision + eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType); + + SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL); + } +} + +static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector &aeTempVecType) +{ + uint32_t i = 0; + for (i = 0; i < psInst->ui32NumOperands; i++) + { + MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType); + } +} + +// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again. +static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand) +{ + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + + if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + // Ignore selection modes that access more than one component + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_SELECT_1_MODE: + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + if (!psOperand->IsSwizzleReplicated()) + return; + break; + case OPERAND_4_COMPONENT_MASK_MODE: + return; + } + + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + + if (psVarType->Class == SVC_SCALAR) + psOperand->iNumComponents = 1; +} + +struct SetPartialDataTypes +{ + SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec) + : m_TempVec(_aeTempVec) + {} + SHADER_VARIABLE_TYPE *m_TempVec; + + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + uint32_t mask = 0; + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + SHADER_VARIABLE_TYPE newType; + uint32_t i, reg; + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (ui32OperandType == FEO_FLAG_SUBOPERAND) + { + // We really shouldn't ever be getting minprecision float indices here + ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8); + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + if (aeTempVecType[reg * 4 + i] == SVT_VOID) + aeTempVecType[reg * 4 + i] = newType; + } + return; + } + + if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT) + return; + + mask = psOperand->GetAccessMask(); + reg = psOperand->ui32RegisterNumber; + newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID); + ASSERT(newType != SVT_VOID); + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + aeTempVecType[reg * 4 + i] = newType; + } + } +}; + +// Write back the temp datatypes into operands. Also mark scalars in constant buffers + +struct WritebackDataTypes +{ + WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec) + : m_Context(_ctx) + , m_TempVec(_aeTempVec) + {} + HLSLCrossCompilerContext *m_Context; + SHADER_VARIABLE_TYPE *m_TempVec; + + template void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const + { + SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec; + uint32_t reg, mask, i; + SHADER_VARIABLE_TYPE dtype; + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + SetCBOperandComponents(m_Context, psOperand); + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + reg = psOperand->ui32RegisterNumber; + mask = psOperand->GetAccessMask(); + dtype = SVT_VOID; + + for (i = 0; i < 4; i++) + { + if (!(mask & (1 << i))) + continue; + + // Check that all components have the same type + ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]); + + dtype = aeTempVecType[reg * 4 + i]; + + ASSERT(dtype != SVT_VOID); + ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype)); + + psOperand->aeDataType[i] = dtype; + } + } +}; + + +void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector & instructions, uint32_t ui32TempCount, std::vector &results) +{ + uint32_t i; + Instruction *psFirstInst = &instructions[0]; + Instruction *psInst = psFirstInst; + // Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float + std::vector &aeTempVecType = results; + + aeTempVecType.clear(); + aeTempVecType.resize(ui32TempCount * 4, SVT_VOID); + + if (ui32TempCount == 0) + return; + + // Go through the instructions, pick up partial datatypes, because we at least know those for a fact. + // Also set all suboperands to be integers (they're always used as indices) + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0])); + + // if (psContext->psShader->ui32MajorVersion <= 3) + { + // First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table + // Only ever to int->float promotion (or int->uint), never the other way around + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + if (psInst->ui32NumOperands == 0) + continue; +#ifdef _DEBUG + for (int k = 0; k < (int)psInst->ui32NumOperands; k++) + { + if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP) + { + ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount); + } + } +#endif + + switch (psInst->eOpcode) + { + // All float-only ops + case OPCODE_ADD: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DIV: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_LOG: + case OPCODE_MAD: + case OPCODE_MIN: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_B: + case OPCODE_SQRT: + case OPCODE_SINCOS: + case OPCODE_LOD: + case OPCODE_GATHER4: + + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_GATHER4_C: + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_RCP: + + MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType); + break; + + // Comparison ops, need to enable possibility for going boolean + case OPCODE_IEQ: + case OPCODE_INE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CALLC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_ILT: + case OPCODE_IGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_ULT: + case OPCODE_UGE: + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType); + break; + + case OPCODE_AND: + case OPCODE_OR: + MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + // Integer ops that don't care of signedness + case OPCODE_IADD: + case OPCODE_INEG: + case OPCODE_ISHL: + case OPCODE_NOT: + case OPCODE_XOR: + case OPCODE_BUFINFO: + case OPCODE_COUNTBITS: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + + + MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType); + break; + + + // Integer ops + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_ISHR: + case OPCODE_IBFE: + + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType); + break; + + + // uint ops + case OPCODE_UDIV: + case OPCODE_UMUL: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_USHR: + case OPCODE_UADDC: + case OPCODE_USUBB: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType); + break; + case OPCODE_UBFE: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType); + break; + + // Need special handling + case OPCODE_FTOI: + case OPCODE_FTOU: + MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_GE: + case OPCODE_LT: + case OPCODE_EQ: + case OPCODE_NE: + + MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_ITOF: + case OPCODE_UTOF: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType); + break; + + case OPCODE_LD: + case OPCODE_LD_MS: + { + SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0]; + MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + break; + } + + case OPCODE_MOVC: + MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_SWAPC: + MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType); + break; + + case OPCODE_RESINFO: + // Operand 0 depends on the return type declaration, op 1 is always uint + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + switch (psInst->eResInfoReturnType) + { + default: + case RESINFO_INSTRUCTION_RETURN_FLOAT: + case RESINFO_INSTRUCTION_RETURN_RCPFLOAT: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + case RESINFO_INSTRUCTION_RETURN_UINT: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + break; + } + break; + + case OPCODE_SAMPLE_INFO: + // Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint. + MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType); + break; + + case OPCODE_SAMPLE_POS: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + break; + + + case OPCODE_LD_UAV_TYPED: + // translates to gvec4 loadImage(gimage i, ivec p). + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + break; + + case OPCODE_STORE_UAV_TYPED: + // translates to storeImage(gimage i, ivec p, gvec4 data) + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data + break; + + case OPCODE_LD_RAW: + if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_RAW: + if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + else + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + break; + + case OPCODE_LD_STRUCTURED: + MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + break; + + case OPCODE_STORE_STRUCTURED: + MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType); + break; + + case OPCODE_F32TOF16: + MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType); + break; + + case OPCODE_F16TOF32: + MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType); + MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType); + break; + + + // No-operands, should never get here anyway + /* case OPCODE_BREAK: + case OPCODE_CALL: + case OPCODE_CASE: + case OPCODE_CONTINUE: + case OPCODE_CUT: + case OPCODE_DEFAULT: + case OPCODE_DISCARD: + case OPCODE_ELSE: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_ENDIF: + case OPCODE_ENDLOOP: + case OPCODE_ENDSWITCH: + + case OPCODE_LABEL: + case OPCODE_LOOP: + case OPCODE_CUSTOMDATA: + case OPCODE_NOP: + case OPCODE_RET: + case OPCODE_SWITCH: + case OPCODE_DCL_RESOURCE: // DCL* opcodes have + case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. + case OPCODE_DCL_SAMPLER: + case OPCODE_DCL_INDEX_RANGE: + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + case OPCODE_DCL_INPUT_PS_SGV: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_OUTPUT: + case OPCODE_DCL_OUTPUT_SGV: + case OPCODE_DCL_OUTPUT_SIV: + case OPCODE_DCL_TEMPS: + case OPCODE_DCL_INDEXABLE_TEMP: + case OPCODE_DCL_GLOBAL_FLAGS: + + + case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader + case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader + case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader + + case OPCODE_EMIT_STREAM: + case OPCODE_CUT_STREAM: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_INTERFACE_CALL: + + + case OPCODE_DCL_STREAM: + case OPCODE_DCL_FUNCTION_BODY: + case OPCODE_DCL_FUNCTION_TABLE: + case OPCODE_DCL_INTERFACE: + + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + case OPCODE_DCL_TESS_DOMAIN: + case OPCODE_DCL_TESS_PARTITIONING: + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + case OPCODE_DCL_HS_MAX_TESSFACTOR: + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + + case OPCODE_DCL_THREAD_GROUP: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + case OPCODE_DCL_RESOURCE_RAW: + case OPCODE_DCL_RESOURCE_STRUCTURED: + case OPCODE_SYNC: + + case OPCODE_EVAL_SNAPPED: + case OPCODE_EVAL_SAMPLE_INDEX: + case OPCODE_EVAL_CENTROID: + + case OPCODE_DCL_GS_INSTANCE_COUNT: + + case OPCODE_ABORT: + case OPCODE_DEBUG_BREAK: + + // Double not supported + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + */ + + default: + break; + } + } + } + + { + int madeProgress = 0; + // Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have + do + { + madeProgress = 0; + psInst = psFirstInst; + for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++) + { + if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC) + { + // Figure out the data type + uint32_t k; + SHADER_VARIABLE_TYPE dataType = SVT_VOID; + int foundImmediate = 0; + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask, j; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32) + { + foundImmediate = 1; + continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed + } + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + { + dataType = psInst->asOperands[k].GetDataType(psContext); + break; + } + + if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE) + { + // If any modifiers are used in MOV or MOVC, that automatically is treated as float. + dataType = SVT_FLOAT; + break; + } + + mask = psInst->asOperands[k].GetAccessMask(); + for (j = 0; j < 4; j++) + { + if (!(mask & (1 << j))) + continue; + if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID) + { + dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]); + } + } + } + + // Use at minimum int type when any operand is immediate. + // Allowing bool could lead into bugs like case 883080 + if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL)) + dataType = SVT_INT; + + if (dataType != SVT_VOID) + { + // Found data type, write to all operands + // First adjust it to not have precision qualifiers in it + switch (dataType) + { + case SVT_FLOAT10: + case SVT_FLOAT16: + dataType = SVT_FLOAT; + break; + case SVT_INT12: + case SVT_INT16: + dataType = SVT_INT; + break; + case SVT_UINT16: + case SVT_UINT8: + dataType = SVT_UINT; + break; + default: + break; + } + for (k = 0; k < psInst->ui32NumOperands; k++) + { + uint32_t mask; + if (psInst->eOpcode == OPCODE_MOVC && k == 1) + continue; // Ignore the condition operand, it's always int + + if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP) + continue; + if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + continue; + + mask = psInst->asOperands[k].GetAccessMask(); + SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress); + } + } + } + } + } + while (madeProgress != 0); + } + + + // translate forced_int and int_ambiguous back to int + for (i = 0; i < ui32TempCount * 4; i++) + { + if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS) + aeTempVecType[i] = SVT_INT; + } + + ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0])); + + // Propagate boolean data types over logical operators + bool didProgress = false; + do + { + didProgress = false; + std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i) + { + if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR) + && (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL) + && (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL)) + { + // Check if all uses see only this define + bool isStandalone = true; + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u) + { + if (u.m_Op->m_Defines.size() > 1) + isStandalone = false; + }); + + if (isStandalone) + { + didProgress = true; + // Change data type of this and all uses + i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL; + uint32_t reg = i.asOperands[0].ui32RegisterNumber; + aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL; + + std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u) + { + u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL; + }); + } + } + }); + } + while (didProgress); +} diff --git a/third_party/HLSLcc/src/Declaration.cpp b/third_party/HLSLcc/src/Declaration.cpp new file mode 100644 index 0000000..4171fb3 --- /dev/null +++ b/third_party/HLSLcc/src/Declaration.cpp @@ -0,0 +1 @@ +#include "internal_includes/Declaration.h" diff --git a/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp b/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp new file mode 100644 index 0000000..7117d81 --- /dev/null +++ b/third_party/HLSLcc/src/HLSLCrossCompilerContext.cpp @@ -0,0 +1,350 @@ +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/debug.h" +#include "internal_includes/Translator.h" +#include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/languages.h" +#include "include/hlslcc.h" +#include + +void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase) +{ + size_t ui32DeclCount = psPhase->psDecl.size(); + uint32_t i; + + psPhase->psTempDeclaration = NULL; + psPhase->ui32OrigTemps = 0; + psPhase->ui32TotalTemps = 0; + + // Retrieve the temp decl count + for (i = 0; i < ui32DeclCount; ++i) + { + if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps; + psPhase->psTempDeclaration = &psPhase->psDecl[i]; + break; + } + } + + if (psPhase->ui32TotalTemps == 0) + return; + + psPhase->ui32OrigTemps = psPhase->ui32TotalTemps; + + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + psPhase->pui32SplitInfo.clear(); + psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff); + + // Build use-define chains and split temps based on those. + { + DefineUseChains duChains; + UseDefineChains udChains; + + BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG()); + + CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps); + + // Only do sampler precision downgrade with pixel shaders on mobile targets / Switch + if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch())) + UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps); + + UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo); + + WriteBackUsesAndDefines(duChains); + } + + HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes); + + if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps)) + psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps; +} + +void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs() +{ + if (psShader->eShaderType != PIXEL_SHADER) + return; + + if (!psShader->extensions->EXT_shader_framebuffer_fetch) + return; + + if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0) + return; + + if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST)) + return; + + if (!psDependencies) + return; + + if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) && + ((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0)) + return; + + // The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments + // So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures. + int maxInOutRenderTargetIndex = -1; + for (const Declaration& decl : psShader->asPhases[0].psDecl) + { + if (decl.eOpcode != OPCODE_DCL_INPUT_PS) + continue; + + const Operand& operand = decl.asOperands[0]; + if (!operand.iPSInOut) + continue; + + const ShaderInfo::InOutSignature* signature = NULL; + if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true)) + continue; + + const int index = signature->ui32SemanticIndex; + if (index > maxInOutRenderTargetIndex) + maxInOutRenderTargetIndex = index; + } + + if (maxInOutRenderTargetIndex >= 0) + { + if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture]) + psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1; + } +} + +void HLSLCrossCompilerContext::ClearDependencyData() +{ + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + psDependencies->ClearCrossDependencyData(); + break; + } + case HULL_SHADER: + { + psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; + break; + } + default: + break; + } +} + +void HLSLCrossCompilerContext::AddIndentation() +{ + int i; + bstring glsl = *currentGLSLString; + for (i = 0; i < indent; ++i) + { + bcatcstr(glsl, " "); + } +} + +bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName) +{ + if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) + return true; + + m_EnabledExtensions.insert(extName); + bformata(extensions, "#extension %s : require\n", extName.c_str()); + return false; +} + +bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName) +{ + if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end()) + return true; + + m_EnabledExtensions.insert(extName); + bformata(extensions, "#ifdef %s\n", extName.c_str()); + bformata(extensions, "#extension %s : enable\n", extName.c_str()); + bcatcstr(extensions, "#endif\n"); + return false; +} + +std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const +{ + std::ostringstream oss; + const ShaderInfo::InOutSignature* psIn = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || + (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true); + + if (psIn && piRebase) + *piRebase = psIn->iRebase; + + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; + std::string res = ""; + + bool skipPrefix = false; + if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect)) + { + if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix) + return inputPrefix + res; + else + return res; + } + + ASSERT(psIn != NULL); + oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex; + return oss.str(); +} + +std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand, + int* piStream, + uint32_t *puiIgnoreSwizzle, + int *piRebase, + int iIgnoreRedirect) const +{ + std::ostringstream oss; + const ShaderInfo::InOutSignature* psOut = NULL; + int regSpace = psOperand->GetRegisterSpace(this); + + if (iIgnoreRedirect == 0) + { + if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) + || (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber; + if (piRebase) + *piRebase = 0; + return oss.str(); + } + } + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true); + + + if (psOut && piRebase) + *piRebase = psOut->iRebase; + + if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end())) + { + // Need to route through temp output variable + oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second; + if (!psOperand->m_SubOperands[0].get()) + { + oss << "[" << psOperand->ui32RegisterNumber << "]"; + } + if (piRebase) + *piRebase = 0; + return oss.str(); + } + + const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch"; + std::string res = ""; + + if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect)) + { + // clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count + // with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true + // for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases + + if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0)) + return outputPrefix + res; + else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0)) + return patchPrefix + res; + else + return res; + } + ASSERT(psOut != NULL); + + oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex; + return oss.str(); +} + +bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count) +{ + char compMask = (char)psOperand->ui32CompMask; + int regSpace = psOperand->GetRegisterSpace(this); + uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams + ASSERT(psShader->ui32CurrentVertexOutputStream < 4); + + // First check for various builtins, mostly depth-output ones. + if (psShader->eShaderType == PIXEL_SHADER) + { + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + return true; + } + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + // GL doesn't need declaration, Metal does. + return psShader->eTargetLanguage == LANG_METAL; + } + } + + // Needs declaring if any of the components hasn't been already declared + if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0) + { + int offset; + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (psOperand->eSpecialName == NAME_UNDEFINED) + { + // Need to fetch the actual comp mask + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister( + psOperand->ui32RegisterNumber, + psOperand->ui32CompMask, + &psSignature); + + compMask = (char)psSignature->ui32Mask; + } + for (offset = 0; offset < count; offset++) + { + psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask; + } + + if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL)) + { + // gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all? + return false; + } + + return true; + } + + return false; +} + +bool HLSLCrossCompilerContext::IsVulkan() const +{ + return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; +} + +bool HLSLCrossCompilerContext::IsSwitch() const +{ + return (flags & HLSLCC_FLAG_NVN_TARGET) != 0; +} diff --git a/third_party/HLSLcc/src/HLSLcc.cpp b/third_party/HLSLcc/src/HLSLcc.cpp new file mode 100644 index 0000000..4592d8c --- /dev/null +++ b/third_party/HLSLcc/src/HLSLcc.cpp @@ -0,0 +1,250 @@ +#include "hlslcc.h" + +#include +#include +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/toMetal.h" +#include "internal_includes/Shader.h" +#include "internal_includes/decode.h" + + +#ifndef GL_VERTEX_SHADER_ARB +#define GL_VERTEX_SHADER_ARB 0x8B31 +#endif +#ifndef GL_FRAGMENT_SHADER_ARB +#define GL_FRAGMENT_SHADER_ARB 0x8B30 +#endif +#ifndef GL_GEOMETRY_SHADER +#define GL_GEOMETRY_SHADER 0x8DD9 +#endif +#ifndef GL_TESS_EVALUATION_SHADER +#define GL_TESS_EVALUATION_SHADER 0x8E87 +#endif +#ifndef GL_TESS_CONTROL_SHADER +#define GL_TESS_CONTROL_SHADER 0x8E88 +#endif +#ifndef GL_COMPUTE_SHADER +#define GL_COMPUTE_SHADER 0x91B9 +#endif + +static bool CheckConstantBuffersNoDuplicateNames(const std::vector& buffers, HLSLccReflection& reflectionCallbacks) +{ + uint32_t count = buffers.size(); + for (uint32_t i = 0; i < count; ++i) + { + const ConstantBuffer& lhs = buffers[i]; + for (uint32_t j = i + 1; j < count; ++j) + { + const ConstantBuffer& rhs = buffers[j]; + if (lhs.name == rhs.name) + { + std::ostringstream oss; + oss << "Duplicate constant buffer declaration: " << lhs.name; + reflectionCallbacks.OnDiagnostics(oss.str(), 0, true); + return false; + } + } + } + + return true; +} + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) +{ + uint32_t* tokens; + char* glslcstr = NULL; + int GLSLShaderType = GL_FRAGMENT_SHADER_ARB; + int success = 0; + uint32_t i; + + tokens = (uint32_t*)shader; + + std::auto_ptr psShader(DecodeDXBC(tokens, flags)); + + if (psShader.get()) + { + Shader* shader = psShader.get(); + if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks)) + return 0; + + HLSLCrossCompilerContext sContext(reflectionCallbacks); + + // Add shader precisions from the list + psShader->sInfo.AddSamplerPrecisions(samplerPrecisions); + + if (psShader->ui32MajorVersion <= 3) + { + flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS; + } + +#ifdef _DEBUG + flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS; +#endif + + sContext.psShader = shader; + sContext.flags = flags; + + // If dependencies == NULL, we'll create a dummy object for it so that there's always something there. + std::auto_ptr depPtr(NULL); + if (dependencies == NULL) + { + depPtr.reset(new GLSLCrossDependencyData()); + sContext.psDependencies = depPtr.get(); + sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices(); + } + else + sContext.psDependencies = dependencies; + + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].hasPostShaderCode = 0; + } + + if (language == LANG_METAL) + { + // Geometry shader is not supported + if (psShader->eShaderType == GEOMETRY_SHADER) + { + result->sourceCode = ""; + return 0; + } + ToMetal translator(&sContext); + if (!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + else + { + ToGLSL translator(&sContext); + language = translator.SetLanguage(language); + translator.SetExtensions(extensions); + if (!translator.Translate()) + { + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + return 0; + } + } + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + { + GLSLShaderType = GL_VERTEX_SHADER_ARB; + break; + } + case GEOMETRY_SHADER: + { + GLSLShaderType = GL_GEOMETRY_SHADER; + break; + } + case DOMAIN_SHADER: + { + GLSLShaderType = GL_TESS_EVALUATION_SHADER; + break; + } + case HULL_SHADER: + { + GLSLShaderType = GL_TESS_CONTROL_SHADER; + break; + } + case COMPUTE_SHADER: + { + GLSLShaderType = GL_COMPUTE_SHADER; + break; + } + default: + { + break; + } + } + + glslcstr = bstr2cstr(sContext.glsl, '\0'); + result->sourceCode = glslcstr; + bcstrfree(glslcstr); + + bdestroy(sContext.glsl); + for (i = 0; i < psShader->asPhases.size(); ++i) + { + bdestroy(psShader->asPhases[i].postShaderCode); + bdestroy(psShader->asPhases[i].earlyMain); + } + + result->reflection = psShader->sInfo; + + result->textureSamplers = psShader->textureSamplers; + + success = 1; + } + + shader = 0; + tokens = 0; + + /* Fill in the result struct */ + + result->shaderType = GLSLShaderType; + result->GLSLLanguage = language; + + return success; +} + +HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename, + unsigned int flags, + GLLang language, + const GlExtensions *extensions, + GLSLCrossDependencyData* dependencies, + HLSLccSamplerPrecisionInfo& samplerPrecisions, + HLSLccReflection& reflectionCallbacks, + GLSLShader* result) +{ + FILE* shaderFile; + int length; + size_t readLength; + std::vector shader; + int success = 0; + + shaderFile = fopen(filename, "rb"); + + if (!shaderFile) + { + return 0; + } + + fseek(shaderFile, 0, SEEK_END); + length = ftell(shaderFile); + fseek(shaderFile, 0, SEEK_SET); + + shader.resize(length + 1); + + readLength = fread(&shader[0], 1, length, shaderFile); + + fclose(shaderFile); + shaderFile = 0; + + shader[readLength] = '\0'; + + success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result); + + return success; +} diff --git a/third_party/HLSLcc/src/HLSLccToolkit.cpp b/third_party/HLSLcc/src/HLSLccToolkit.cpp new file mode 100644 index 0000000..d081f2d --- /dev/null +++ b/third_party/HLSLcc/src/HLSLccToolkit.cpp @@ -0,0 +1,574 @@ +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/debug.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Shader.h" +#include "internal_includes/languages.h" +#include "include/UnityInstancingFlexibleArraySize.h" +#include +#include + +namespace HLSLcc +{ + uint32_t GetNumberBitsSet(uint32_t a) + { + // Calculate number of bits in a + // Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64 + // Works only up to 14 bits (we're only using up to 4) + return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf; + } + + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType) + { + if (eType == SVT_FLOAT16) + { + return TO_FLAG_FORCE_HALF; + } + if (eType == SVT_UINT || eType == SVT_UINT16) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12) + { + return TO_FLAG_INTEGER; + } + else if (eType == SVT_BOOL) + { + return TO_FLAG_BOOL; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags) + { + if (typeflags & TO_FLAG_FORCE_HALF) + return SVT_FLOAT16; + if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT)) + return SVT_INT; + if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT)) + return SVT_UINT; + if (typeflags & TO_FLAG_BOOL) + return SVT_BOOL; + return SVT_FLOAT; + } + + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision) + { + static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" }; + static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" }; + static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" }; + static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" }; + static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" }; + static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" }; + static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" }; + static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" }; + static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" }; + + ASSERT(components >= 1 && components <= 4); + bool emitLowp = EmitLowp(context); + + switch (eType) + { + case SVT_UINT: + return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components]; + case SVT_UINT16: + return useGLSLPrecision ? uint16Types[components] : uintTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + return useGLSLPrecision ? int16Types[components] : intTypes[components]; + case SVT_INT12: + return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + return useGLSLPrecision ? float16Types[components] : floatTypes[components]; + case SVT_FLOAT10: + return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components) + { + static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" }; + static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" }; + static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" }; + static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" }; + static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" }; + static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" }; + static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" }; + + ASSERT(components >= 1 && components <= 4); + + switch (eType) + { + case SVT_UINT: + return uintTypes[components]; + case SVT_UINT16: + case SVT_UINT8: // there is not uint8 in metal so treat it as ushort + return ushortTypes[components]; + case SVT_INT: + return intTypes[components]; + case SVT_INT16: + case SVT_INT12: + return shortTypes[components]; + case SVT_FLOAT: + return floatTypes[components]; + case SVT_FLOAT16: + case SVT_FLOAT10: + return halfTypes[components]; + case SVT_BOOL: + return boolTypes[components]; + default: + ASSERT(0); + return " "; + } + } + + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return GetConstructorForTypeMetal(eType, components); + else + return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision); + } + + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows) + { + std::string result; + std::ostringstream oss; + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "float" << columns << "x" << rows; + break; + case SVT_FLOAT16: + case SVT_FLOAT10: + oss << "half" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + } + else + { + switch (eBaseType) + { + case SVT_FLOAT: + oss << "mat" << columns << "x" << rows; + break; + case SVT_FLOAT16: + oss << "mediump mat" << columns << "x" << rows; + break; + case SVT_FLOAT10: + oss << "lowp mat" << columns << "x" << rows; + break; + default: + ASSERT(0); + break; + } + } + result = oss.str(); + return result; + } + + void AddSwizzleUsingElementCount(bstring dest, uint32_t count) + { + if (count == 4) + return; + if (count) + { + bcatcstr(dest, "."); + bcatcstr(dest, "x"); + count--; + } + if (count) + { + bcatcstr(dest, "y"); + count--; + } + if (count) + { + bcatcstr(dest, "z"); + count--; + } + if (count) + { + bcatcstr(dest, "w"); + count--; + } + } + + // Calculate the bits set in mask + int WriteMaskToComponentCount(uint32_t writeMask) + { + // In HLSL bytecode writemask 0 also means everything + if (writeMask == 0) + return 4; + + return (int)GetNumberBitsSet(writeMask); + } + + uint32_t BuildComponentMaskFromElementCount(int count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + return (1 << count) - 1; + } + + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src) + { + if (src == dest) + return true; + + if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) && + (src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16)) + return true; + + if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) && + (src == SVT_INT || src == SVT_INT12 || src == SVT_INT16)) + return true; + + if ((dest == SVT_UINT || dest == SVT_UINT16) && + (src == SVT_UINT || src == SVT_UINT16)) + return true; + + return false; + } + + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType) + { + if (eType == RETURN_TYPE_SINT) + { + return TO_FLAG_INTEGER; + } + else if (eType == RETURN_TYPE_UINT) + { + return TO_FLAG_UNSIGNED_INTEGER; + } + else + { + return TO_FLAG_NONE; + } + } + + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec) + { + if (eType == RETURN_TYPE_SINT) + { + switch (ePrec) + { + default: + return SVT_INT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_INT12; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_INT16; + } + } + else if (eType == RETURN_TYPE_UINT) + { + switch (ePrec) + { + default: + return SVT_UINT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_UINT8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_UINT16; + } + } + else + { + switch (ePrec) + { + default: + return SVT_FLOAT; + case REFLECT_RESOURCE_PRECISION_LOWP: + return SVT_FLOAT10; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return SVT_FLOAT16; + } + } + } + + RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type) + { + switch (type) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + return RETURN_TYPE_SINT; + case SVT_UINT: + case SVT_UINT16: + return RETURN_TYPE_UINT; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + return RETURN_TYPE_FLOAT; + default: + return RETURN_TYPE_UNUSED; + } + } + + REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type) + { + switch (type) + { + case SVT_INT: + case SVT_UINT: + case SVT_FLOAT: + return REFLECT_RESOURCE_PRECISION_HIGHP; + case SVT_INT16: + case SVT_UINT16: + case SVT_FLOAT16: + return REFLECT_RESOURCE_PRECISION_MEDIUMP; + case SVT_INT12: + case SVT_FLOAT10: + case SVT_UINT8: + return REFLECT_RESOURCE_PRECISION_LOWP; + default: + return REFLECT_RESOURCE_PRECISION_UNKNOWN; + } + } + + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount) + { + return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2); + } + + // Returns true if the operation is commutative + bool IsOperationCommutative(int eOpCode) + { + switch ((OPCODE_TYPE)eOpCode) + { + case OPCODE_DADD: + case OPCODE_IADD: + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_IMUL: + case OPCODE_OR: + case OPCODE_AND: + return true; + default: + return false; + } + } + + // Returns true if operands are identical, only cares about temp registers currently. + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB) + { + if (!psA || !psB) + return 0; + + if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP) + return 0; + + if (psA->eModifier != psB->eModifier) + return 0; + + if (psA->iNumComponents != psB->iNumComponents) + return 0; + + if (psA->ui32RegisterNumber != psB->ui32RegisterNumber) + return 0; + + if (psA->eSelMode != psB->eSelMode) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0]) + return 0; + + if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0])) + return 0; + + return 1; + } + + bool IsAddOneInstruction(const Instruction *psInst) + { + if (psInst->eOpcode != OPCODE_IADD) + return false; + if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP) + { + if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + + if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1) + return false; + } + else + { + if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32) + return false; + if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP) + return false; + + if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber) + return false; + + if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1) + return false; + } + return true; + } + + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim) + { + switch ((RESOURCE_DIMENSION)eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + return 1; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURECUBE: + return 2; + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + return 3; + default: + ASSERT(0); + break; + } + return 0; + } + + // Returns the "more important" type of a and b, currently int < uint < float + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b) + { +#define DO_CHECK(type) if( a == type || b == type ) return type + + // Priority ordering + DO_CHECK(SVT_FLOAT16); + DO_CHECK(SVT_FLOAT10); + DO_CHECK(SVT_UINT16); + DO_CHECK(SVT_UINT8); + DO_CHECK(SVT_INT16); + DO_CHECK(SVT_INT12); + DO_CHECK(SVT_FORCED_INT); + DO_CHECK(SVT_FLOAT); + DO_CHECK(SVT_UINT); + DO_CHECK(SVT_INT); + DO_CHECK(SVT_INT_AMBIGUOUS); + +#undef DO_CHECK + // After these just rely on ordering. + return a > b ? a : b; + } + + // Returns true if a direct constructor can convert src->dest + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest) + { + // uint<->int<->bool conversions possible + if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) && + (dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16)) + return true; + + // float<->double possible + if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) && + (dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10)) + return true; + + if (context->psShader->eTargetLanguage == LANG_METAL) + { + // avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size + if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT)) + return true; + } + + return false; + } + + bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf) + { + return psCBuf != NULL && psCBuf->asVars.size() == 1 + && psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2 + && IsUnityInstancingConstantBufferName(psCBuf->name.c_str()); + } + +#ifndef fpcheck +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif +#endif // #ifndef fpcheck + + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f) + { + bstring temp; + int ePos; + int pointPos; + + temp = bformat("%.9g", f); + ePos = bstrchrp(temp, 'e', 0); + pointPos = bstrchrp(temp, '.', 0); + + bconcat(b, temp); + bdestroy(temp); + + if (ePos < 0 && pointPos < 0 && !fpcheck(f)) + bcatcstr(b, ".0"); + } + + bstring GetEarlyMain(HLSLCrossCompilerContext *psContext) + { + bstring *oldString = psContext->currentGLSLString; + bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + int indent = psContext->indent; + + if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) + ++psContext->indent; + + psContext->currentGLSLString = str; + psContext->AddIndentation(); + psContext->currentGLSLString = oldString; + psContext->indent = indent; + + return *str; + } + + bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext) + { + bstring *oldString = psContext->currentGLSLString; + bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + int indent = psContext->indent; + + if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent) + ++psContext->indent; + + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + + psContext->currentGLSLString = str; + psContext->AddIndentation(); + psContext->currentGLSLString = oldString; + psContext->indent = indent; + + return *str; + } +} diff --git a/third_party/HLSLcc/src/HLSLccTypes.natvis b/third_party/HLSLcc/src/HLSLccTypes.natvis new file mode 100644 index 0000000..6dd7c23 --- /dev/null +++ b/third_party/HLSLcc/src/HLSLccTypes.natvis @@ -0,0 +1,10 @@ + + + + {{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}} + + + {{ type={eType}, reg={ui32RegisterNumber} }} + + + diff --git a/third_party/HLSLcc/src/Instruction.cpp b/third_party/HLSLcc/src/Instruction.cpp new file mode 100644 index 0000000..ee384cd --- /dev/null +++ b/third_party/HLSLcc/src/Instruction.cpp @@ -0,0 +1,349 @@ +#include "internal_includes/Instruction.h" +#include "internal_includes/debug.h" +#include "include/ShaderInfo.h" + +// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles +static Operand *GetSrcSwizzleOperand(Instruction *psInst) +{ + switch (psInst->eOpcode) + { + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_NOP: + case OPCODE_SWAPC: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + ASSERT(0); + return NULL; + + // Normal arithmetics, all srcs have swizzles + case OPCODE_ADD: + case OPCODE_AND: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DIV: + case OPCODE_EQ: + case OPCODE_EXP: + case OPCODE_FRC: + case OPCODE_FTOI: + case OPCODE_FTOU: + case OPCODE_GE: + case OPCODE_IADD: + case OPCODE_IEQ: + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IMAD: + case OPCODE_IMAX: + case OPCODE_IMIN: + case OPCODE_IMUL: + case OPCODE_INE: + case OPCODE_INEG: + case OPCODE_ITOF: + case OPCODE_LOG: + case OPCODE_LT: + case OPCODE_MAD: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_MOV: + case OPCODE_MUL: + case OPCODE_NE: + case OPCODE_NOT: + case OPCODE_OR: + case OPCODE_ROUND_NE: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_Z: + case OPCODE_RSQ: + case OPCODE_SINCOS: + case OPCODE_SQRT: + case OPCODE_UDIV: + case OPCODE_UGE: + case OPCODE_ULT: + case OPCODE_UMAD: + case OPCODE_UMAX: + case OPCODE_UMIN: + case OPCODE_UMUL: + case OPCODE_UTOF: + case OPCODE_XOR: + + case OPCODE_BFI: + case OPCODE_BFREV: + case OPCODE_COUNTBITS: + case OPCODE_DADD: + case OPCODE_DDIV: + case OPCODE_DEQ: + case OPCODE_DFMA: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DMOV: + case OPCODE_DNE: + case OPCODE_DRCP: + case OPCODE_DTOF: + case OPCODE_F16TOF32: + case OPCODE_F32TOF16: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_FTOD: + case OPCODE_IBFE: + case OPCODE_RCP: + case OPCODE_UADDC: + case OPCODE_UBFE: + case OPCODE_USUBB: + case OPCODE_MOVC: + case OPCODE_DMOVC: + return NULL; + + // Special cases: + case OPCODE_GATHER4: + case OPCODE_GATHER4_C: + case OPCODE_LD: + case OPCODE_LD_MS: + case OPCODE_LOD: + case OPCODE_LD_UAV_TYPED: + case OPCODE_LD_RAW: + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_RESINFO: + return &psInst->asOperands[2]; + + case OPCODE_GATHER4_PO: + case OPCODE_GATHER4_PO_C: + case OPCODE_LD_STRUCTURED: + return &psInst->asOperands[3]; + + case OPCODE_SAMPLE_INFO: + return &psInst->asOperands[1]; + + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_USHR: + // sm4 variant has single component selection on src1 -> only src0 has swizzle + if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + return &psInst->asOperands[1]; + else // whereas sm5 variant has swizzle also on src1 + return NULL; + + default: + ASSERT(0); + return NULL; + } +} + +// Tweak the source operands of an instruction so that the rebased write mask will still work +static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase) +{ + uint32_t i; + switch (psOperand->eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL); + + // Special case for immediates, they do not have swizzles + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]); + return; + } + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + if (psOperand->iNumComponents > 1) + std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]); + return; + } + + // Need to change this to swizzle + psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE; + psOperand->ui32Swizzle = 0; + for (i = 0; i < 4 - rebase; i++) + psOperand->aui32Swizzle[i] = i + rebase; + for (; i < 4; i++) + psOperand->aui32Swizzle[i] = rebase; // The first actual input. + break; + case OPERAND_4_COMPONENT_SELECT_1_MODE: + // Nothing to do + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + for (i = rebase; i < 4; i++) + psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i]; + break; + } +} + +void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase) +{ + uint32_t i = 0; + uint32_t accessMask = 0; + int isDestination = 0; + Operand *psSwizzleOperand = NULL; + + if (flags & UD_CHANGE_SUBOPERANDS) + { + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase); + } + } + + if ((flags & UD_CHANGE_MAIN_OPERAND) == 0) + return; + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + if (psOperand->ui32RegisterNumber != oldReg) + return; + + accessMask = psOperand->GetAccessMask(); + // If this operation touches other components than the one(s) we're splitting, skip it + if ((accessMask & (~compMask)) != 0) + { + // Verify that we've not messed up in reachability analysis. + // This would mean that we've encountered an instruction that accesses + // a component in multi-component mode and we're supposed to treat it as single-use only. + // Now that we track operands we can bring this back + ASSERT((accessMask & compMask) == 0); + return; + } + +#if 0 + printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask); +#endif + psOperand->ui32RegisterNumber = newReg; + + if (rebase == 0) + return; + + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (psOperand->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = psOperand->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + psOperand->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(psOperand->aui32Swizzle[0] >= rebase); + psOperand->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(psOperand->aui32Swizzle[i] >= rebase); + psOperand->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + } + + // Tweak operand datatypes + std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]); + + // If this operand is a destination, we'll need to tweak sources as well + for (i = 0; i < ui32FirstSrc; i++) + { + if (psOperand == &asOperands[i]) + { + isDestination = 1; + break; + } + } + + if (isDestination == 0) + return; + + // Nasty corner case of 2 destinations, not supported if both targets are written + ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL)); + + // If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction + switch (eOpcode) + { + // The opcodes that do not need tweaking: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_BUFINFO: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + return; + + default: + psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands + if (psSwizzleOperand) + { + DoSrcOperandRebase(psSwizzleOperand, rebase); + return; + } + else + { + for (i = ui32FirstSrc; i < ui32NumOperands; i++) + { + DoSrcOperandRebase(&asOperands[i], rebase); + } + } + return; + } +} + +// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision +bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const +{ + const Operand *op; + const ResourceBinding *psBinding = NULL; + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + switch (eOpcode) + { + default: + return false; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_D: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + break; + } + + op = &asOperands[3]; + ASSERT(op->eType == OPERAND_TYPE_SAMPLER); + + info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding); + if (!psBinding) + { + /* Try to look from texture group */ + info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding); + } + + sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (sType == OPERAND_MIN_PRECISION_DEFAULT) + return false; + + if (pType) + *pType = sType; + + return true; +} diff --git a/third_party/HLSLcc/src/LoopTransform.cpp b/third_party/HLSLcc/src/LoopTransform.cpp new file mode 100644 index 0000000..e3ba6e6 --- /dev/null +++ b/third_party/HLSLcc/src/LoopTransform.cpp @@ -0,0 +1,370 @@ +#include "src/internal_includes/HLSLCrossCompilerContext.h" +#include "src/internal_includes/LoopTransform.h" +#include "src/internal_includes/Shader.h" +#include "src/internal_includes/debug.h" +#include +#include +#include + +namespace HLSLcc +{ + struct LoopInfo + { + public: + LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {} + + Instruction * m_StartLoop; // OPCODE_LOOP + Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above. + std::vector m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth + bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing. + }; + + typedef std::list Loops; + + // Build a loopinfo array of all the loops in this shader phase + void BuildLoopInfo(ShaderPhase &phase, Loops &res) + { + using namespace std; + res.clear(); + + // A stack of loopinfo elements (stored in res) + list loopStack; + + // Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here. + list dummyLIForSwitches; + + for (std::vector::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++) + { + Instruction *i = &*instItr; + + if (i->eOpcode == OPCODE_LOOP) + { + LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo()); + currLoopInfo->m_StartLoop = i; + loopStack.push_front(currLoopInfo); + } + else if (i->eOpcode == OPCODE_ENDLOOP) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + li->m_EndLoop = i; + } + else if (i->eOpcode == OPCODE_SWITCH) + { + // Create a dummy entry into the stack + LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo()); + li->m_IsSwitch = true; + loopStack.push_front(li); + } + else if (i->eOpcode == OPCODE_ENDSWITCH) + { + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + loopStack.pop_front(); + ASSERT(li->m_IsSwitch); + } + else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC) + { + // Get the current loopstack head + ASSERT(!loopStack.empty()); + LoopInfo *li = *loopStack.begin(); + // Ignore breaks from switch-cases + if (!li->m_IsSwitch) + { + li->m_ExitPoints.push_back(i); + } + } + } + } + + // Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp + static bool IsScalarTempComparisonInstruction(const Instruction *i) + { + switch (i->eOpcode) + { + default: + return false; + case OPCODE_IGE: + case OPCODE_ILT: + case OPCODE_IEQ: + case OPCODE_INE: + case OPCODE_UGE: + case OPCODE_ULT: + break; + } + + if (i->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + + int tempOp = -1; + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP) + tempOp = 1; + else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP) + tempOp = 2; + + // Also reject comparisons where we compare temp.x vs temp.y + if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber) + return false; + + if (tempOp == -1) + return false; + + if (i->asOperands[0].GetNumSwizzleElements() != 1) + return false; + + return true; + } + + // Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX imm32 + static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b) + { + if (a->eOpcode != b->eOpcode) + return false; + ASSERT(a->ui32NumOperands == b->ui32NumOperands); + uint32_t dstReg = 0; + if (a->asOperands[0].eType != OPERAND_TYPE_TEMP) + return false; + dstReg = a->asOperands[0].ui32RegisterNumber; + + for (uint32_t i = 0; i < a->ui32NumOperands; i++) + { + const Operand &aop = a->asOperands[i]; + const Operand &bop = b->asOperands[i]; + if (aop.eType != bop.eType) + return false; + + if (aop.GetAccessMask() != bop.GetAccessMask()) + return false; + + if (aop.GetNumSwizzleElements() != 1) + return false; + + if (aop.eType == OPERAND_TYPE_TEMP) + { + if (aop.ui32RegisterNumber != bop.ui32RegisterNumber) + return false; + if (aop.ui32RegisterNumber != dstReg) + return false; + } + else if (aop.eType == OPERAND_TYPE_IMMEDIATE32) + { + if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0) + return false; + } + } + return true; + } + + // Attempt to transform a single loop into a for-statement + static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li) + { + // In order to transform a loop into a for, the following has to hold: + // - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC. + // - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above + // Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement. + // Also, the loop induction variable must be standalone (as in, never used as part of a larger vector) + + Instruction *cmpInst = li.m_StartLoop + 1; + + if (!IsScalarTempComparisonInstruction(cmpInst)) + return; + + Instruction *breakInst = li.m_StartLoop + 2; + if (breakInst->eOpcode != OPCODE_BREAKC) + return; + if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber) + return; + + // Check that the comparison result isn't used anywhere else + if (cmpInst->m_Uses.size() != 1) + return; + + ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst); + + // Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable + uint32_t inductionVarIdx = 0; + + Instruction *lastInst = li.m_EndLoop - 1; + if (lastInst->eOpcode != OPCODE_IADD) + return; + if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP) + return; + + if (lastInst->asOperands[0].GetNumSwizzleElements() != 1) + return; + + uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber; + // Verify that the induction variable actually matches. + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar) + inductionVarIdx = 1; + else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar) + inductionVarIdx = 2; + else + return; + + // Verify that we also read from the induction variable in the last instruction + if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) || + (lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar))) + return; + + // Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops, + // but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex") + // This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing. + // So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop. + if (psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan()) + { + for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++) + { + switch (itr->eOpcode) + { + case OPCODE_LD_RAW: + case OPCODE_LD_STRUCTURED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_STORE_RAW: + case OPCODE_STORE_STRUCTURED: + case OPCODE_STORE_UAV_TYPED: + return; // Nope, can't do a for, not even a partial one. + default: + break; + } + } + } + + // One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst. + // Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called. + // Of course, if all those instructions are identical, then it's fine. + // Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well. + + Instruction *initializer = NULL; + std::vector definitionsOutsideRange; + std::vector definitionsInsideRange; + std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def) + { + if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop) + definitionsOutsideRange.push_back(&def); + else + definitionsInsideRange.push_back(&def); + }); + + if (definitionsInsideRange.size() != 1) + { + // All definitions must be identical + for (std::vector::iterator itr = definitionsInsideRange.begin() + 1; itr != definitionsInsideRange.end(); itr++) + { + if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst)) + return; + } + } + + ASSERT(definitionsOutsideRange.size() > 0); + if (definitionsOutsideRange.size() == 1) + initializer = definitionsOutsideRange[0]->m_Inst; + + // Initializer must only write to one component + if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1) + initializer = 0; + // Initializer data type must be int or uint + if (initializer) + { + SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext); + if (dataType != SVT_INT && dataType != SVT_UINT) + return; + } + + // Check that the initializer is only used within the range so we can move it to for statement + if (initializer) + { + bool hasUsesOutsideRange = false; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u) + { + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + hasUsesOutsideRange = true; + }); + // Has outside uses? we cannot pull that up to the for statement + if (hasUsesOutsideRange) + initializer = 0; + } + + // Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either + if (initializer) + { + bool cannotDoInitializer = false; + for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++) + { + const Instruction::Use &u = *itr; + if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop) + { + cannotDoInitializer = true; + break; + } + // Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var) + if (u.m_Op->GetAccessMask() != 1) + { + cannotDoInitializer = true; + break; + } + } + // Has outside uses? we cannot pull that up to the for statement + if (cannotDoInitializer) + initializer = 0; + } + + + if (initializer) + { + // We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that. + uint32_t newRegister = phase.m_NextFreeTempRegister++; + li.m_StartLoop->m_InductorRegister = newRegister; + std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u) + { + u.m_Op->m_ForLoopInductorName = newRegister; + }); + // Also tweak the destinations for cmpInst, and lastInst + if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + cmpInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + cmpInst->asOperands[2].m_ForLoopInductorName = newRegister; + + if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber) + lastInst->asOperands[1].m_ForLoopInductorName = newRegister; + else + lastInst->asOperands[2].m_ForLoopInductorName = newRegister; + + lastInst->asOperands[0].m_ForLoopInductorName = newRegister; + initializer->asOperands[0].m_ForLoopInductorName = newRegister; + } + + // This loop can be transformed to for-loop. Do the necessary magicks. + li.m_StartLoop->m_LoopInductors[0] = initializer; + li.m_StartLoop->m_LoopInductors[1] = cmpInst; + li.m_StartLoop->m_LoopInductors[2] = breakInst; + li.m_StartLoop->m_LoopInductors[3] = lastInst; + + if (initializer) + initializer->m_SkipTranslation = true; + cmpInst->m_SkipTranslation = true; + breakInst->m_SkipTranslation = true; + lastInst->m_SkipTranslation = true; + } + + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase) + { + Loops loops; + BuildLoopInfo(phase, loops); + + std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li) + { + // Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point + // Also that there's at least 2 instructions in loop body + ASSERT(li.m_StartLoop != 0); + ASSERT(li.m_EndLoop != 0); + ASSERT(li.m_EndLoop > li.m_StartLoop + 2); + ASSERT(!li.m_IsSwitch); + ASSERT(!li.m_ExitPoints.empty()); + AttemptLoopTransform(psContext, phase, li); + }); + } +} diff --git a/third_party/HLSLcc/src/Operand.cpp b/third_party/HLSLcc/src/Operand.cpp new file mode 100644 index 0000000..9d9bf23 --- /dev/null +++ b/third_party/HLSLcc/src/Operand.cpp @@ -0,0 +1,641 @@ +#include "internal_includes/Operand.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" + +uint32_t Operand::GetAccessMask() const +{ + int i; + uint32_t accessMask = 0; + // NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now. + switch (eSelMode) + { + default: + case OPERAND_4_COMPONENT_MASK_MODE: + // Update access mask + accessMask = ui32CompMask; + if (accessMask == 0) + accessMask = OPERAND_4_COMPONENT_MASK_ALL; + break; + + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + accessMask = 0; + for (i = 0; i < 4; i++) + accessMask |= 1 << (aui32Swizzle[i]); + break; + + case OPERAND_4_COMPONENT_SELECT_1_MODE: + accessMask = 1 << (aui32Swizzle[0]); + break; + } + ASSERT(accessMask != 0); + return accessMask; +} + +int Operand::GetMaxComponent() const +{ + if (iWriteMaskEnabled && + iNumComponents == 4) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W)) + { + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W) + { + return 4; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z) + { + return 3; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y) + { + return 2; + } + if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X) + { + return 1; + } + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == NO_SWIZZLE) + return 4; + + uint32_t res = 0; + for (int i = 0; i < 4; i++) + { + res = std::max(aui32Swizzle[i], res); + } + return (int)res + 1; + } + else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return 1; + } + } + + return 4; +} + +//Single component repeated +//e..g .wwww +bool Operand::IsSwizzleReplicated() const +{ + if (iWriteMaskEnabled && + iNumComponents == 4) + { + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == WWWW_SWIZZLE || + ui32Swizzle == ZZZZ_SWIZZLE || + ui32Swizzle == YYYY_SWIZZLE || + ui32Swizzle == XXXX_SWIZZLE) + { + return true; + } + } + } + return false; +} + +// Get the number of elements returned by operand, taking additional component mask into account +uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const +{ + uint32_t count = 0; + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + // Adjust component count and break to more processing + ((Operand *)this)->iNumComponents = 3; + break; + case OPERAND_TYPE_IMMEDIATE32: + case OPERAND_TYPE_IMMEDIATE64: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH: + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + default: + { + break; + } + } + + if (iWriteMaskEnabled && + iNumComponents != 1) + { + //Component Mask + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t compMask = ui32CompMask; + if (compMask == 0) + compMask = OPERAND_4_COMPONENT_MASK_ALL; + compMask &= _ui32CompMask; + + if (compMask == OPERAND_4_COMPONENT_MASK_ALL) + return 4; + + if (compMask & OPERAND_4_COMPONENT_MASK_X) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Y) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_Z) + { + count++; + } + if (compMask & OPERAND_4_COMPONENT_MASK_W) + { + count++; + } + } + else + //Component Swizzle + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + uint32_t i; + for (i = 0; i < 4; ++i) + { + if ((_ui32CompMask & (1 << i)) == 0) + continue; + + count++; + } + } + else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)) + { + count++; + } + else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W)) + { + count++; + } + } + + //Component Select 1 + } + + if (!count) + { + // Translate numComponents into bitmask + // 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15 + uint32_t compMask = (1 << iNumComponents) - 1; + + compMask &= _ui32CompMask; + // Calculate bits left in compMask + return HLSLcc::GetNumberBitsSet(compMask); + } + + return count; +} + +// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch +int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const +{ + if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER) + return 0; + + if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE) + return 0; + + if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT) + return 0; + + if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT) + return 0; + + return 1; +} + +int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const +{ + return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase); +} + +SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const +{ + // indexable temps (temp arrays) are always float + if (eType == OPERAND_TYPE_INDEXABLE_TEMP) + return SVT_FLOAT; + + // The min precision qualifier overrides all of the stuff below + switch (eMinPrecision) + { + case OPERAND_MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + return SVT_FLOAT10; + case OPERAND_MIN_PRECISION_SINT_16: + return SVT_INT16; + case OPERAND_MIN_PRECISION_UINT_16: + return SVT_UINT16; + default: + break; + } + + switch (eType) + { + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT; + int i = 0; + + if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + return aeDataType[aui32Swizzle[0]]; + } + if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32Swizzle == (NO_SWIZZLE)) + { + return aeDataType[0]; + } + + return aeDataType[aui32Swizzle[0]]; + } + + if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask = ui32CompMask; + if (!mask) + { + mask = OPERAND_4_COMPONENT_MASK_ALL; + } + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + eCurrentType = aeDataType[i]; + break; + } + } + +#ifdef _DEBUG + //Check if all elements have the same basic type. + for (; i < 4; ++i) + { + if (mask & (1 << i)) + { + if (eCurrentType != aeDataType[i]) + { + ASSERT(0); + } + } + } +#endif + return eCurrentType; + } + + ASSERT(0); + + break; + } + case OPERAND_TYPE_OUTPUT: + { + const uint32_t ui32Register = ui32RegisterNumber; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psOut = NULL; + + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream, + &psOut); + else + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true); + if (!psOut) + return SVT_FLOAT; + } + + ASSERT(psOut != NULL); + if (psOut->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psOut->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + if (psOut->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psOut->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_INPUT: + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1]; + int regSpace = GetRegisterSpace(psContext); + const ShaderInfo::InOutSignature* psIn = NULL; + + if (regSpace == 0) + { + if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(), + &psIn); + } + else + { + if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0) + return SVT_FLOAT; // All combined inputs are stored as floats + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn); + } + + ASSERT(psIn != NULL); + + switch (eSpecialName) + { + //UINT in DX, INT in GL. + case NAME_PRIMITIVE_ID: + case NAME_VERTEX_ID: + case NAME_INSTANCE_ID: + case NAME_RENDER_TARGET_ARRAY_INDEX: + case NAME_VIEWPORT_ARRAY_INDEX: + case NAME_SAMPLE_INDEX: + return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; + + case NAME_IS_FRONT_FACE: + return SVT_UINT; + + case NAME_POSITION: + case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: + return SVT_FLOAT; + + default: + break; + // fall through + } + + if (psIn->eSystemValueType == NAME_IS_FRONT_FACE) + return SVT_UINT; + + //UINT in DX, INT in GL. + if (psIn->eSystemValueType == NAME_PRIMITIVE_ID || + psIn->eSystemValueType == NAME_VERTEX_ID || + psIn->eSystemValueType == NAME_INSTANCE_ID || + psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX || + psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX || + psIn->eSystemValueType == NAME_SAMPLE_INDEX) + return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT; + + if (psIn->eMinPrec != MIN_PRECISION_DEFAULT) + { + switch (psIn->eMinPrec) + { + default: + ASSERT(0); + break; + case MIN_PRECISION_FLOAT_16: + return SVT_FLOAT16; + case MIN_PRECISION_FLOAT_2_8: + if (psContext->psShader->eTargetLanguage == LANG_METAL) + return SVT_FLOAT16; + else + return SVT_FLOAT10; + case MIN_PRECISION_SINT_16: + return SVT_INT16; + case MIN_PRECISION_UINT_16: + return SVT_UINT16; + } + } + + if (psIn->eComponentType == INOUT_COMPONENT_UINT32) + { + return SVT_UINT; + } + else if (psIn->eComponentType == INOUT_COMPONENT_SINT32) + { + return SVT_INT; + } + return SVT_FLOAT; + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = -1; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf); + if (psCBuf) + { + int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (foundVar) + return psVarType->Type; + + ASSERT(0); + } + else + ASSERT(0); + break; + } + case OPERAND_TYPE_IMMEDIATE32: + { + return ePreferredTypeForImmediates; + } + + case OPERAND_TYPE_IMMEDIATE64: + { + return SVT_DOUBLE; + } + + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + { + return SVT_UINT; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + return SVT_INT; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + return SVT_UINT; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + return SVT_INT; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + return SVT_INT; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan + { + return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT; + } + + default: + { + return SVT_FLOAT; + } + } + + return SVT_FLOAT; +} + +OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec) +{ + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return OPERAND_MIN_PRECISION_FLOAT_2_8; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return OPERAND_MIN_PRECISION_FLOAT_16; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return OPERAND_MIN_PRECISION_DEFAULT; + } +} + +int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const +{ + const ShaderInfo::InOutSignature *psSig = NULL; + int regSpace = GetRegisterSpace(psContext); + + switch (eType) + { + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + return 1; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + case OPERAND_TYPE_INPUT_THREAD_ID: + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + return 3; + default: + break; + } + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig); + + ASSERT(psSig != NULL); + + return HLSLcc::GetNumberBitsSet(psSig->ui32Mask); +} + +Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const +{ + Operand *psDynIndexOp = m_SubOperands[0].get(); + if (psDynIndexOp == NULL) + psDynIndexOp = m_SubOperands[1].get(); + + *needsIndexCalcRevert = false; + if (psDynIndexOp != NULL && isAoS) + { + // if dynamically indexing array of structs, try using the original index var before the float4 address calc + bool indexVarFound = false; + *needsIndexCalcRevert = true; + Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst; + Operand *asOps = psDynIndexOrigin->asOperands; + Operand *psOriginOp = NULL; + + // DXBC always addresses as float4, find the address calculation + + // Special case where struct is float4 size, no extra calc is done + if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway + { + indexVarFound = true; + *needsIndexCalcRevert = false; + } + else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL) + { + // check which one of the src operands is the original index + if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[2]; + else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[3]; + } + else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL) + { + if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + psOriginOp = &asOps[0]; + else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32) + psOriginOp = &asOps[1]; + } + + if (psOriginOp != NULL) + { + indexVarFound = true; + + // Check if the mul dest is not the same temp as the src. Also check that the temp + // does not have multiple uses (which could override the value) + // -> we can use src straight and no index revert calc is needed + if ((psOriginOp->eType == OPERAND_TYPE_INPUT) + || ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext)) + && (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1)) + { + psDynIndexOp = psOriginOp; + *needsIndexCalcRevert = false; + } + } + + // Atm we support only this very basic case of dynamic indexing array of structs. + // Return error if something else is encountered. + if (!indexVarFound) + psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true); + } + + return psDynIndexOp; +} diff --git a/third_party/HLSLcc/src/Shader.cpp b/third_party/HLSLcc/src/Shader.cpp new file mode 100644 index 0000000..6f62ffe --- /dev/null +++ b/third_party/HLSLcc/src/Shader.cpp @@ -0,0 +1,989 @@ +#include "internal_includes/Shader.h" +#include "internal_includes/debug.h" +#include +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/HLSLccToolkit.h" + +uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const +{ + switch (eType) + { + case SVT_FLOAT: + return psFloatTempSizes[ui32Reg]; + case SVT_FLOAT16: + return psFloat16TempSizes[ui32Reg]; + case SVT_FLOAT10: + return psFloat10TempSizes[ui32Reg]; + case SVT_INT: + return psIntTempSizes[ui32Reg]; + case SVT_INT16: + return psInt16TempSizes[ui32Reg]; + case SVT_INT12: + return psInt12TempSizes[ui32Reg]; + case SVT_UINT: + return psUIntTempSizes[ui32Reg]; + case SVT_UINT16: + return psUInt16TempSizes[ui32Reg]; + case SVT_DOUBLE: + return psDoubleTempSizes[ui32Reg]; + case SVT_BOOL: + return psBoolTempSizes[ui32Reg]; + default: + ASSERT(0); + } + return 0; +} + +void Shader::ConsolidateHullTempVars() +{ + uint32_t i, phase; + uint32_t numTemps = 0; + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps) + numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps; + asPhases[phase].psDecl[i].value.ui32NumTemps = 0; + } + } + } + // Now we have the max temps, write it back to the first one we see. + for (phase = 0; phase < asPhases.size(); phase++) + { + for (i = 0; i < asPhases[phase].psDecl.size(); i++) + { + if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS) + { + asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps; + return; + } + } + } +} + +// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing. +// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly. +// While doing that we also get the number of components expected in the image format. +// Also resolve access flags for other UAVs as well. No component count resolving for them. +void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo) +{ + Declaration *psFirstDeclaration = &psDecl[0]; + + uint32_t ui32NumDeclarations = (uint32_t)psDecl.size(); + Instruction *psFirstInstruction = &psInst[0]; + uint32_t ui32NumInstructions = (uint32_t)psInst.size(); + + if (ui32NumDeclarations == 0 || ui32NumInstructions == 0) + return; + + Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1; + Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1; + Declaration *psDecl; + + for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++) + { + Instruction *psInst; + uint32_t uavReg; + if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED && + psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW) + continue; + + uavReg = psDecl->asOperands[0].ui32RegisterNumber; + + for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++) + { + uint32_t opIndex; + uint32_t accessFlags; + uint32_t numComponents; + + switch (psInst->eOpcode) + { + case OPCODE_LD_UAV_TYPED: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target + break; + + case OPCODE_STORE_UAV_TYPED: + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; // store op does not contribute on the component count resolving + break; + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMIN: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_UMAX: + opIndex = 0; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; + numComponents = 1; + break; + + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; + numComponents = 1; + break; + + // The rest of the ops here are only for buffer UAVs. No need for component count resolving. + case OPCODE_LD_STRUCTURED: + opIndex = 3; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_STRUCTURED: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_LD_RAW: + opIndex = 2; + accessFlags = ACCESS_FLAG_READ; + numComponents = 0; + break; + + case OPCODE_STORE_RAW: + opIndex = 0; + accessFlags = ACCESS_FLAG_WRITE; + numComponents = 0; + break; + + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + opIndex = 1; + accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC; + numComponents = 0; + break; + + default: + continue; + } + + // Buffer loads can also happen on non-uav. Skip those. + if (psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + continue; + + // Check the instruction is operating on the declared uav + if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg) + continue; + + psDecl->sUAV.ui32AccessFlags |= accessFlags; + + // get the max components accessed, but only for typed (texture) UAVs + if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + psDecl->sUAV.ui32NumComponents = numComponents; + } + } + + if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + const ResourceBinding* psBinding = 0; + if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding)) + { + // component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4 + psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1; + } + } + } +} + +static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable) +{ + int i; + uint32_t reg; + for (i = 0; i < MAX_SUB_OPERANDS; i++) + { + if (psOperand->m_SubOperands[i].get()) + GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable); + } + + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier + + destTable[reg] |= (char)psOperand->GetAccessMask(); +} + +// Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate +static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps) +{ + // Just move all operations back to their original registers, but keep the data type assignments. + uint32_t i, k; + Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1]; + std::vector opAccessMasks; + + // First move all newly created temps to high enough so they won't overlap with the rebased ones + + Instruction *inst = &psPhase->psInst[0]; + + if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0) + return; + + while (inst <= psLastInstruction) + { + // Update all operands and their suboperands + for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0); + } + inst++; + } + + // Prune the original registers, rebase if necessary + opAccessMasks.clear(); + opAccessMasks.resize(psPhase->ui32TotalTemps, 0); + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]); + inst++; + } + + for (i = 0; i < psPhase->ui32TotalTemps; i++) + { + uint32_t rebase, count; + uint32_t newReg = i; + uint32_t origReg = i; + int needsMoving = 0; + SHADER_VARIABLE_TYPE dataType; + + // Figure out rebase and count + rebase = 0; + count = 0; + if (i < psPhase->ui32OrigTemps) + { + // One of the original registers + k = opAccessMasks[i]; + if (k == 0) + continue; + + while ((k & 1) == 0) + { + rebase++; + k = k >> 1; + } + while (k != 0) + { + count++; + k = k >> 1; + } + newReg = i + ui32MaxOrigTemps * rebase; + if (rebase != 0) + needsMoving = 1; + } + else + { + // Newly created split registers, read info from table + // Read the count and rebase from split info table + count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff; + rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff; + origReg = 0x10000 + i; + newReg = (psPhase->pui32SplitInfo[i]) & 0xffff; + while (psPhase->pui32SplitInfo[newReg] != 0xffffffff) + newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff; + + // If count is 4, verify that we have both first and last bit set + ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9); + + newReg = newReg + ui32MaxOrigTemps * rebase; + + // Don't rebase again + rebase = 0; + needsMoving = 1; + } + + if (needsMoving) + { + // printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase); + + // Move directly to correct location + inst = &psPhase->psInst[0]; + while (inst <= psLastInstruction) + { + for (k = 0; k < inst->ui32NumOperands; k++) + inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase); + inst++; + } + } + // Mark the count + dataType = psPhase->peTempTypes[i * 4 + rebase]; + switch (dataType) + { + default: + ASSERT(0); + break; + case SVT_BOOL: + psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count); + break; + case SVT_FLOAT: + psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count); + break; + case SVT_FLOAT16: + psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count); + break; + case SVT_FLOAT10: + psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count); + break; + case SVT_INT: + psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count); + break; + case SVT_INT16: + psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count); + break; + case SVT_INT12: + psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count); + break; + case SVT_UINT: + psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count); + break; + case SVT_UINT16: + psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count); + break; + case SVT_DOUBLE: + psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count); + break; + } + } +} + +// Mark whether the temp registers are used per each data type. +void Shader::PruneTempRegisters() +{ + uint32_t k; + uint32_t maxOrigTemps = 0; + uint32_t maxTotalTemps = 0; + // First find the total amount of temps + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps); + maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps); + } + + if (maxTotalTemps == 0) + return; // splitarrays are nulls, no need to free + + // Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase + psIntTempSizes.clear(); + psIntTempSizes.resize(maxOrigTemps * 4, 0); + psInt12TempSizes.clear(); + psInt12TempSizes.resize(maxOrigTemps * 4, 0); + psInt16TempSizes.clear(); + psInt16TempSizes.resize(maxOrigTemps * 4, 0); + psUIntTempSizes.clear(); + psUIntTempSizes.resize(maxOrigTemps * 4, 0); + psUInt16TempSizes.clear(); + psUInt16TempSizes.resize(maxOrigTemps * 4, 0); + psFloatTempSizes.clear(); + psFloatTempSizes.resize(maxOrigTemps * 4, 0); + psFloat16TempSizes.clear(); + psFloat16TempSizes.resize(maxOrigTemps * 4, 0); + psFloat10TempSizes.clear(); + psFloat10TempSizes.resize(maxOrigTemps * 4, 0); + psDoubleTempSizes.clear(); + psDoubleTempSizes.resize(maxOrigTemps * 4, 0); + psBoolTempSizes.clear(); + psBoolTempSizes.resize(maxOrigTemps * 4, 0); + + for (k = 0; k < asPhases.size(); k++) + { + ShaderPhase *psPhase = &asPhases[k]; + CoalesceTemps(this, psPhase, maxOrigTemps); + if (psPhase->psTempDeclaration) + psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4; + } +} + +static void DoSignatureAnalysis(std::vector &psSignatures, std::vector &outTable) +{ + // Fill the char, 2 bits per component so that each 2 bits encode the following info: + // 0: unused OR used by the first signature we happened to see + // 1: used by the second signature + // 2: used by the third sig + // 3: used by the fourth sig. + + // The counters for each input/output/patch. Start with 8 registers, grow as needed + std::vector counters(8, (unsigned char)0); + outTable.clear(); + outTable.resize(8, (unsigned char)0); + + size_t i; + for (i = 0; i < psSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psSignatures[i]; + char currCounter; + char mask; + ASSERT(psSig != NULL); + + // We'll skip SV_Depth and others that put -1 to the register. + if (psSig->ui32Register == 0xffffffffu) + continue; + + // Make sure there's enough room in the table + if (psSig->ui32Register >= counters.size()) + { + counters.resize(psSig->ui32Register * 2, 0); + outTable.resize(psSig->ui32Register * 2, 0); + } + + // Apply counter value to masked items + currCounter = counters[psSig->ui32Register]; + // Duplicate counter bits + currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6); + // Widen the mask + mask = (unsigned char)psSig->ui32Mask; + mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1); + mask = mask | (mask << 1); + // Write output + outTable[psSig->ui32Register] |= (currCounter & mask); + // Update counter + counters[psSig->ui32Register]++; + } +} + +void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand) +{ + uint32_t i; + uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase); + unsigned char *redirectTable = NULL; + unsigned char redir = 0; + unsigned char firstFound = 0; + uint32_t mask; + + for (i = 0; i < MAX_SUB_OPERANDS; i++) + if (psOperand->m_SubOperands[i].get()) + DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get()); + + + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT: + case OPERAND_TYPE_INPUT_CONTROL_POINT: + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_CONTROL_POINT: + redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0]; + break; + + default: + // Not a input or output, nothing to do here + return; + } + + redir = redirectTable[psOperand->ui32RegisterNumber]; + + if (redir == 0xff) // Already found overlap? + return; + + mask = psOperand->GetAccessMask(); + i = 0; + // Find the first mask bit set. + while ((mask & (1 << i)) == 0) + i++; + + firstFound = (redir >> (i * 2)) & 3; + for (; i < 4; i++) + { + unsigned char sig; + if ((mask & (1 << i)) == 0) + continue; + + sig = (redir >> (i * 2)) & 3; + // All set bits must access the same signature + if (sig != firstFound) + { + redirectTable[psOperand->ui32RegisterNumber] = 0xff; + return; + } + } +} + +static void PruneRedirectEntry(unsigned char &itr) +{ + if (itr != 0xff) + itr = 0; +} + +// Check if inputs and outputs are accessed across semantic boundaries +// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. +void Shader::AnalyzeIOOverlap() +{ + uint32_t i, k; + std::vector outData; + DoSignatureAnalysis(sInfo.psInputSignatures, outData); + + // Now data has the values, copy them to all phases + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acInputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psOutputSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acOutputNeedsRedirect = outData; + + DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData); + for (i = 0; i < asPhases.size(); i++) + asPhases[i].acPatchConstantsNeedsRedirect = outData; + + // Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise) + // if we're crossing signature borders + for (i = 0; i < asPhases.size(); i++) + { + ShaderPhase *psPhase = &asPhases[i]; + for (k = 0; k < psPhase->psInst.size(); k++) + { + Instruction *psInst = &psPhase->psInst[k]; + uint32_t j; + for (j = 0; j < psInst->ui32NumOperands; j++) + DoIOOverlapOperand(psPhase, &psInst->asOperands[j]); + } + + // Now prune all tables from anything except 0xff. + std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry); + std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry); + } +} + +void Shader::SetMaxSemanticIndex() +{ + for (std::vector::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); + + for (std::vector::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); + + for (std::vector::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it) + maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex); +} + +// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array. +// Luckily, each chunk is always accessed with suboperand plus (in ui32RegisterNumber) +// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read. +void ShaderPhase::PruneConstArrays() +{ + using namespace std; + auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; }); + // Not found? We're done. + if (customDataItr == psDecl.end()) + return; + + // Store the original declaration + m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr); + + // Loop through each operand and pick up usage masks + HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + using namespace std; + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + uint32_t accessMask = psOperand->GetAccessMask(); + uint32_t offset = psOperand->ui32RegisterNumber; + + // Update the chunk access mask + + // Find all existing entries that have anything common with the access mask + auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset); + vector matchingEntries; + for (auto itr = cbrange.first; itr != cbrange.second; itr++) + { + if ((itr->second.m_AccessMask & accessMask) != 0) + { + matchingEntries.push_back(itr); + } + } + + if (matchingEntries.empty()) + { + // Not found, create new entry + m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand))); + } + else if (matchingEntries.size() == 1) + { + // Update access mask of the one existing entry + matchingEntries[0]->second.m_AccessMask |= accessMask; + matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand); + } + else + { + // Multiple entries with (now) overlapping mask. Merge to the first one. + ChunkMap::iterator tgt = matchingEntries[0]; + tgt->second.m_AccessMask |= accessMask; + tgt->second.m_UseSites.push_back((Operand *)psOperand); + ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks; + for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr) + { + tgt->second.m_AccessMask |= itr->second.m_AccessMask; + chunks.erase(itr); + }); + } + } + }); + + // Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found) + + uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size(); + for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++) + { + // Find the next chunk that shares any bits in the access mask + auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr) + { + return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0; + }); + + // Not found? Must continue until the end of array + if (nextItr == m_ConstantArrayInfo.m_Chunks.end()) + chunk->second.m_Size = totalSize - chunk->first; + else + { + // Otherwise we know the chunk size directly. + chunk->second.m_Size = nextItr->first - chunk->first; + } + + // Do rebase on the operands if necessary + chunk->second.m_Rebase = 0; + uint32_t t = chunk->second.m_AccessMask; + ASSERT(t != 0); + while ((t & 1) == 0) + { + chunk->second.m_Rebase++; + t >>= 1; + } + uint32_t rebase = chunk->second.m_Rebase; + uint32_t componentCount = 0; + while (t != 0) + { + componentCount++; + t >>= 1; + } + chunk->second.m_ComponentCount = componentCount; + + for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op) + { + // Store the rebase value to each operand and do the actual rebase. + op->m_Rebase = rebase; + op->m_Size = componentCount; + + if (rebase != 0) + { + // Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask. + switch (op->eSelMode) + { + case OPERAND_4_COMPONENT_MASK_MODE: + { + uint32_t oldMask = op->ui32CompMask; + if (oldMask == 0) + oldMask = OPERAND_4_COMPONENT_MASK_ALL; + + // Check that we're not losing any information + ASSERT((oldMask >> rebase) << rebase == oldMask); + op->ui32CompMask = (oldMask >> rebase); + break; + } + case OPERAND_4_COMPONENT_SELECT_1_MODE: + ASSERT(op->aui32Swizzle[0] >= rebase); + op->aui32Swizzle[0] -= rebase; + break; + case OPERAND_4_COMPONENT_SWIZZLE_MODE: + { + for (int i = 0; i < 4; i++) + { + // Note that this rebase is different from the one done for source operands + ASSERT(op->aui32Swizzle[i] >= rebase); + op->aui32Swizzle[i] -= rebase; + } + break; + } + default: + ASSERT(0); + } + } + }); + } + + + // We'll do the actual declaration and pruning later on, now that we have the info stored up. +} + +HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG() +{ + if (!m_CFGInitialized) + { + m_CFG.Build(psInst.data(), psInst.data() + psInst.size()); + m_CFGInitialized = true; + } + + return m_CFG; +} + +void ShaderPhase::UnvectorizeImmMoves() +{ + // NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore + // (we insert new instructions there) + using namespace std; + vector nInst; + // Reserve 1.5x space + nInst.reserve(psInst.size() * 3 / 2); + + for_each(psInst.begin(), psInst.end(), [&](Instruction &i) + { + if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1) + { + nInst.push_back(i); + return; + } + // Ok, found one to unvectorize. + ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + uint32_t mask = i.asOperands[0].ui32CompMask; + for (uint32_t j = 0; j < 4; j++) + { + if ((mask & (1 << j)) == 0) + continue; + + Instruction ni = i; + ni.asOperands[0].ui32CompMask = (1 << j); + nInst.push_back(ni); + } + }); + psInst.clear(); + psInst.swap(nInst); +} + +void ShaderPhase::ExpandSWAPCs() +{ + // First find the DCL_TEMPS declaration + auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; }); + if (dcitr == psDecl.end()) + { + // No temp declaration? Probably we won't have SWAPC either, then. + return; + } + Declaration &tmpDecl = *dcitr; + + uint32_t extraTemp = 0; + bool extraTempAllocated = false; + + // Parse through instructions, open up SWAPCs if necessary + while (1) + { + // Need to find from top every time, because we're inserting stuff into the vector + auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; }); + if (swapItr == psInst.end()) + break; + + // Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN): + +/* swapc dest0[.mask], + dest1[.mask], + src0[.swizzle], + src1[.swizzle], + src2[.swizzle] + + expands to : + + movc temp[dest0s mask], + src0[.swizzle], + src2[.swizzle], src1[.swizzle] + + movc dest1[.mask], + src0[.swizzle], + src1[.swizzle], src2[.swizzle] + + mov dest0.mask, temp +*/ + // Allocate a new temp, if not already done + if (!extraTempAllocated) + { + extraTemp = tmpDecl.value.ui32NumTemps++; + extraTempAllocated = true; + } + + Instruction origSwapInst; +#if _DEBUG + origSwapInst.id = swapItr->id; +#endif + std::swap(*swapItr, origSwapInst); // Store the original swapc for reading + + // OP 1: MOVC temp[dest0 mask], src0, src2, stc1 + swapItr->eOpcode = OPCODE_MOVC; + swapItr->ui32NumOperands = 4; + swapItr->ui32FirstSrc = 1; + swapItr->asOperands[0] = origSwapInst.asOperands[0]; + swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP; + swapItr->asOperands[0].ui32RegisterNumber = extraTemp; + // mask is already fine + swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0 + swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2 + swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1 + // swapItr is already in the psInst vector. + + Instruction newInst[2] = { Instruction(), Instruction() }; + // OP 2: MOVC dest1, src0, src1, src2 + newInst[0].eOpcode = OPCODE_MOVC; + newInst[0].ui32NumOperands = 4; + newInst[0].ui32FirstSrc = 1; + newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1 + newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0 + newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1 + newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2 +#if _DEBUG + newInst[0].id = swapItr->id; +#endif + + // OP 3: mov dest0.mask, temp + newInst[1].eOpcode = OPCODE_MOV; + newInst[1].ui32NumOperands = 2; + newInst[1].ui32FirstSrc = 1; + newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0 + // First copy dest0 to src as well to get the mask set up correctly + newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0; + // Then overwrite with temp reg + newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP; + newInst[1].asOperands[1].ui32RegisterNumber = extraTemp; +#if _DEBUG + newInst[1].id = swapItr->id; +#endif + + // Insert the new instructions to the vector + psInst.insert(swapItr + 1, newInst, newInst + 2); + } +} + +void Shader::ExpandSWAPCs() +{ + // Just call ExpandSWAPCs for each phase + for (int i = 0; i < asPhases.size(); i++) + { + asPhases[i].ExpandSWAPCs(); + } +} + +void Shader::ForcePositionToHighp() +{ + // Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?) + if (eShaderType != VERTEX_SHADER) + return; + + ShaderPhase &phase = asPhases[0]; + + // Find the output declaration + std::vector::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool + { + if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV) + { + const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName; + if (specialName == NAME_POSITION || + specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird). + { + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + } + return false; + } + else if (decl.eOpcode == OPCODE_DCL_OUTPUT) + { + const ShaderInfo::InOutSignature *sig = NULL; + sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) + { + ((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT; + return true; + } + return false; + } + return false; + }); + + // Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc. + if (itr == phase.psDecl.end()) + return; + + uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber; + + HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector::iterator itr, Operand *op, uint32_t flags) + { + if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg) + op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + }); +} + +void Shader::FindUnusedGlobals(uint32_t flags) +{ + for (int i = 0; i < asPhases.size(); i++) + { + ShaderPhase &phase = asPhases[i]; + + // Loop through every operand and pick up usages + HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, [&](std::vector::iterator inst, Operand *op, uint32_t flags) + { + // Not a constant buffer read? continue + if (op->eType != OPERAND_TYPE_CONSTANT_BUFFER) + return; + + const uint32_t ui32BindingPoint = op->aui32ArraySizes[0]; + const ConstantBuffer *psCBuf = NULL; + sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + if (!psCBuf) + return; + + // Get all the struct members that can be reached from this usage: + uint32_t mask = op->GetAccessMask(); + for (uint32_t k = 0; k < 4; k++) + { + if ((mask & (1 << k)) == 0) + continue; + + uint32_t tmpSwizzle[4] = {k, k, k, k}; + int rebase; + bool isArray; + + ShaderVarType *psVarType = NULL; + + ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags); + + // Mark as used. Also all parents. + while (psVarType) + { + psVarType->m_IsUsed = true; + psVarType = psVarType->Parent; + } + } + }); + } +} diff --git a/third_party/HLSLcc/src/ShaderInfo.cpp b/third_party/HLSLcc/src/ShaderInfo.cpp new file mode 100644 index 0000000..554f202 --- /dev/null +++ b/third_party/HLSLcc/src/ShaderInfo.cpp @@ -0,0 +1,520 @@ +#include "ShaderInfo.h" +#include "internal_includes/debug.h" +#include "internal_includes/tokens.h" +#include "Operand.h" +#include +#include +#include + + +SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo) +{ + const ResourceBinding* psBinding = 0; + int found; + found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding); + ASSERT(found != 0); + return psBinding->GetDataType(); +} + +void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const +{ + ASSERT(ui32MajorVersion > 3); + *ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]]; +} + +int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const +{ + size_t i; + const size_t ui32NumBindings = psResourceBindings.size(); + const ResourceBinding* psBindings = &psResourceBindings[0]; + + for (i = 0; i < ui32NumBindings; ++i) + { + if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup) + { + if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount)) + { + *ppsOutBinding = psBindings + i; + return 1; + } + } + } + return 0; +} + +int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const +{ + size_t i; + const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset && + ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size)) + { + *ppsShaderVar = &psThisPointerConstBuffer->asVars[i]; + return 1; + } + } + return 0; +} + +int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const +{ + size_t i; + const size_t ui32NumVars = psInputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psInputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; +} + +int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const +{ + size_t i; + const size_t ui32NumVars = psPatchConstantSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0)) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + // There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks. + // In those situations just take the last signature that uses that register (it's typically the "highest" one) + for (i = ui32NumVars - 1; i-- > 0;) + { + if (ui32Register == psPatchConstantSignatures[i].ui32Register) + { + *ppsOut = &psPatchConstantSignatures[i]; + return 1; + } + } + + ASSERT(allowNull); + return 0; +} + +int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register, + const uint32_t ui32CompMask, + const uint32_t ui32Stream, + const InOutSignature** ppsOut, + bool allowNull /* = false */) const +{ + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + ASSERT(ui32CompMask != 0); + + for (i = 0; i < ui32NumVars; ++i) + { + if (ui32Register == psOutputSignatures[i].ui32Register && + (ui32CompMask & psOutputSignatures[i].ui32Mask) && + ui32Stream == psOutputSignatures[i].ui32Stream) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(allowNull); + return 0; +} + +int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const +{ + size_t i; + const size_t ui32NumVars = psOutputSignatures.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + if (eSystemValueType == psOutputSignatures[i].eSystemValueType && + ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex) + { + *ppsOut = &psOutputSignatures[i]; + return 1; + } + } + ASSERT(0); + return 0; +} + +uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize) +{ + // Default is regular matrices, vectors and scalars + uint32_t size = psType->Columns * psType->Rows * 4; + + // Struct size is calculated from the offset and size of its last member. + // Need to take into account that members could be arrays. + if (psType->Class == SVC_STRUCT) + { + size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true); + } + // Matrices represented as vec4 arrays have special size calculation + else if (matrixAsVectors) + { + if (psType->Class == SVC_MATRIX_ROWS) + { + size = psType->Rows * 16; + } + else if (psType->Class == SVC_MATRIX_COLUMNS) + { + size = psType->Columns * 16; + } + } + + if (wholeArraySize && psType->Elements > 1) + { + uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size + size = (psType->Elements - 1) * paddedSize + size; // Except the last element + } + + return size; +} + +static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType, + uint32_t parentOffset, + uint32_t offsetToFind, + bool* isArray, + std::vector* arrayIndices, + int32_t* pi32Rebase, + uint32_t flags) +{ + uint32_t thisOffset = parentOffset + psType->Offset; + uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0); + uint32_t paddedSize = ((thisSize + 15) / 16) * 16; + uint32_t arraySize = thisSize; + + // Array elements are padded to align on vec4 size, except for the last one + if (psType->Elements) + arraySize = (paddedSize * (psType->Elements - 1)) + thisSize; + + if ((offsetToFind >= thisOffset) && + offsetToFind < (thisOffset + arraySize)) + { + *isArray = false; + if (psType->Class == SVC_STRUCT) + { + if (psType->Elements > 1 && arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / thisSize); + + // Need to bring offset back to element zero in case of array of structs + uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize; + uint32_t m = 0; + + for (m = 0; m < psType->MemberCount; ++m) + { + const ShaderVarType* psMember = &psType->Members[m]; + + const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags); + if (foundType != NULL) + return foundType; + } + } + // Check for array of scalars or vectors (both take up 16 bytes per element). + // Matrices are also treated as arrays of vectors. + else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) || + ((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1)) + { + *isArray = true; + if (arrayIndices != NULL) + arrayIndices->push_back((offsetToFind - thisOffset) / 16); + } + else if (psType->Class == SVC_VECTOR) + { + //Check for vector starting at a non-vec4 offset. + + // cbuffer $Globals + // { + // + // float angle; // Offset: 0 Size: 4 + // float2 angle2; // Offset: 4 Size: 8 + // + // } + + //cb0[0].x = angle + //cb0[0].yzyy = angle2.xyxx + + //Rebase angle2 so that .y maps to .x, .z maps to .y + + pi32Rebase[0] = thisOffset % 16; + } + + return psType; + } + return NULL; +} + +int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset, + const uint32_t(&pui32Swizzle)[4], + const ConstantBuffer* psCBuf, + const ShaderVarType** ppsShaderVar, // Output the found var + bool* isArray, // Output bool that tells if the found var is an array + std::vector* arrayIndices, // Output vector of array indices in order from root parent to the found var + int32_t* pi32Rebase, // Output swizzle rebase + uint32_t flags) +{ + size_t i; + + uint32_t ui32ByteOffset = ui32Vec4Offset * 16; + + //Swizzle can point to another variable. In the example below + //cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined + //into vectors. psCBuf->ui32NumVars will be 3. + + // cbuffer cbUIUpdates + // { + // float g_fLifeSpan; // Offset: 0 Size: 4 + // float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused] + // float g_fRadiusMin; // Offset: 8 Size: 4 [unused] + // float g_fRadiusMax; // Offset: 12 Size: 4 [unused] + // float g_fGrowTime; // Offset: 16 Size: 4 [unused] + // float g_fStepSize; // Offset: 20 Size: 4 + // float g_fTurnRate; // Offset: 24 Size: 4 + // float g_fTurnSpeed; // Offset: 28 Size: 4 [unused] + // float g_fLeafRate; // Offset: 32 Size: 4 + // float g_fShrinkTime; // Offset: 36 Size: 4 [unused] + // uint g_uMaxFaces; // Offset: 40 Size: 4 + // } + if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ui32ByteOffset += 4; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ui32ByteOffset += 8; + } + else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ui32ByteOffset += 12; + } + + const size_t ui32NumVars = psCBuf->asVars.size(); + + for (i = 0; i < ui32NumVars; ++i) + { + ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags); + + if (ppsShaderVar[0] != NULL) + return 1; + } + return 0; +} + +// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array. +// Searches for brackets and inserts indices one by one. +std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors) +{ + std::ostringstream oss; + size_t prevpos = 0; + size_t pos = psShaderVar->fullName.find('[', 0); + uint32_t i = 0; + while (pos != std::string::npos) + { + pos++; + oss << psShaderVar->fullName.substr(prevpos, pos - prevpos); + + // Add possibly given dynamic index for the root array. + if (i == 0 && !dynamicIndex.empty()) + { + oss << dynamicIndex; + + // if we couldn't use original index temp, revert the float4 address calc here + if (revertDynamicIndexCalc) + { + const ShaderVarType* psRootVar = psShaderVar; + while (psRootVar->Parent != NULL) + psRootVar = psRootVar->Parent; + + uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4 + oss << " / " << thisSize; + } + + if (!indices.empty() && indices[i] != 0) + oss << " + " << indices[i]; + } + else if (i < indices.size()) + oss << indices[i]; + + prevpos = pos; + i++; + pos = psShaderVar->fullName.find('[', prevpos); + } + oss << psShaderVar->fullName.substr(prevpos); + + return oss.str(); +} + +ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType) +{ + switch (eType) + { + case RTYPE_CBUFFER: + return RGROUP_CBUFFER; + + case RTYPE_SAMPLER: + return RGROUP_SAMPLER; + + case RTYPE_TEXTURE: + case RTYPE_BYTEADDRESS: + case RTYPE_STRUCTURED: + return RGROUP_TEXTURE; + + case RTYPE_UAV_RWTYPED: + case RTYPE_UAV_RWSTRUCTURED: + case RTYPE_UAV_RWBYTEADDRESS: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + return RGROUP_UAV; + + case RTYPE_TBUFFER: + ASSERT(0); // Need to find out which group this belongs to + return RGROUP_TEXTURE; + default: + break; + } + + ASSERT(0); + return RGROUP_CBUFFER; +} + +static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn) +{ + ASSERT(samplerIn.compare(0, 7, "sampler") == 0); + + // please note that we do not have hard rules about how sampler names should be structured + // what's more they can even skip texture name (but that should be handled separately) + // how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word" + // note that we want to support c-style naming (with underscores for spaces) + // as it is pretty normal to have texture name starting with underscore + // we bind underscores "to the right" + + // note that we want sampler state to be case insensitive + // while checking for a match could be done with strncasecmp/_strnicmp + // windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead + std::string sampler = samplerIn; + for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i) + *i = std::tolower(*i); + + struct Token { const char* str; int len; }; + #define TOKEN(s) { s, (int)strlen(s) } + Token token[] = { + TOKEN("compare"), + TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"), + TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"), + TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"), + TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"), + TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"), + }; + #undef TOKEN + + const char* s = sampler.c_str(); + for (int texNameStart = 7; s[texNameStart];) + { + // skip underscores and find the potential beginning of a token + int tokenStart = texNameStart, tokenEnd = -1; + while (s[tokenStart] == '_') + ++tokenStart; + + // check token list for matches + for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i) + if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0) + tokenEnd = tokenStart + token[i].len; + + if (tokenEnd < 0) + { + // we have found texture name + + // find next token + int nextTokenStart = sampler.length(); + for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i) + { + // again: note that we want to be case insensitive + const int pos = sampler.find(token[i].str, tokenStart); + + if (pos != std::string::npos && pos < nextTokenStart) + nextTokenStart = pos; + } + + // check preceeding underscores, but only if we have found an actual token (not the end of the string) + if (nextTokenStart < sampler.length()) + { + while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_') + --nextTokenStart; + } + + // note that we return the substring of the initial sampler name to preserve case + return samplerIn.substr(texNameStart, nextTokenStart - texNameStart); + } + else + { + // we have found known token + texNameStart = tokenEnd; + } + } + + // if we ended up here, the texture name is missing + return ""; +} + +// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below +// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test +static inline void Test_GetTextureNameFromSamplerName() +{ + #define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t)) + + CHECK("sampler_point_clamp", ""); + CHECK("sampler_point_clamp_Tex", "_Tex"); + CHECK("sampler_point_clamp_Tex__", "_Tex__"); + CHECK("sampler_______point_Tex", "_Tex"); + + CHECK("samplerPointClamp", ""); + CHECK("samplerPointClamp_Tex", "_Tex"); + CHECK("samplerPointClamp_Tex__", "_Tex__"); + + CHECK("samplerPointTexClamp", "Tex"); + CHECK("samplerPoint_TexClamp", "_Tex"); + CHECK("samplerPoint_Tex_Clamp", "_Tex"); + + #undef CHECK +} + +void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info) +{ + if (info.empty()) + return; + +#if _DEBUG && 0 + Test_GetTextureNameFromSamplerName(); +#endif + + for (size_t i = 0; i < psResourceBindings.size(); i++) + { + ResourceBinding *rb = &psResourceBindings[i]; + if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED) + continue; + + // Try finding the exact match + HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name); + + // If match not found, check if name has "sampler" prefix (DX11 style sampler case) + // then we try to recover texture name from sampler name + if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0) + j = info.find(GetTextureNameFromSamplerName(rb->name)); + + // note that if we didnt find the respective texture, we cannot say anything about sampler precision + // currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float + // TODO: should we somehow allow overriding it? + if (j != info.end()) + rb->ePrecision = j->second; + } +} diff --git a/third_party/HLSLcc/src/UseDefineChains.cpp b/third_party/HLSLcc/src/UseDefineChains.cpp new file mode 100644 index 0000000..f6f7e89 --- /dev/null +++ b/third_party/HLSLcc/src/UseDefineChains.cpp @@ -0,0 +1,814 @@ +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/debug.h" +#include "internal_includes/Instruction.h" + +#include "internal_includes/ControlFlowGraph.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +using HLSLcc::ForEachOperand; + +#define DEBUG_UDCHAINS 0 + +#if DEBUG_UDCHAINS +// Debug mode +static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (du != psDUChains[idx].end()) + { + ASSERT(du->index == idx % 4); + // Check that the definition actually writes to idx + { + uint32_t tempReg = idx / 4; + uint32_t offs = idx - (tempReg * 4); + uint32_t accessMask = 1 << offs; + uint32_t i; + int found = 0; + for (i = 0; i < du->psInst->ui32FirstSrc; i++) + { + if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP) + { + if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg) + { + uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]); + if (writeMask & accessMask) + { + ASSERT(writeMask == du->writeMask); + found = 1; + break; + } + } + } + } + ASSERT(found); + } + + // Check that each usage of each definition also is found in the use-define chain + UsageSet::iterator ul = du->usages.begin(); + while (ul != du->usages.end()) + { + // Search for the usage in the chain + UseDefineChain::iterator use = ud; + while (use != psUDChains[idx].end() && &*use != *ul) + use++; + ASSERT(use != psUDChains[idx].end()); + ASSERT(&*use == *ul); + + // Check that the mapping back is also found + ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end()); + + ul++; + } + + du++; + } +} + +static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + DefineUseChain::iterator du = psDUChains[idx].begin(); + UseDefineChain::iterator ud = psUDChains[idx].begin(); + while (ud != psUDChains[idx].end()) + { + // Check that each definition of each usage also is found in the define-use chain + DefineSet::iterator dl = ud->defines.begin(); + ASSERT(ud->psOp->ui32RegisterNumber == idx / 4); + ASSERT(ud->index == idx % 4); + while (dl != ud->defines.end()) + { + // Search for the definition in the chain + DefineUseChain::iterator def = du; + while (def != psDUChains[idx].end() && &*def != *dl) + def++; + ASSERT(def != psDUChains[idx].end()); + ASSERT(&*def == *dl); + + // Check that the mapping back is also found + ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end()); + + dl++; + } + ud++; + } +} + +static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions) +{ + uint32_t i; + for (i = 0; i < tempRegs * 4; i++) + { + UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions); + UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions); + } +} + +#define printf_console printf + +#endif + +using namespace HLSLcc::ControlFlow; +using std::for_each; + +static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index) +{ + // Try to find an existing entry + auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de) + { + return de.psInst == def.m_Instruction && de.psOp == def.m_Operand; + }); + + if (itr != psDUChain.end()) + { + return &(*itr); + } + + // Not found, create + psDUChain.push_front(DefineUseChainEntry()); + DefineUseChainEntry &de = *psDUChain.begin(); + + de.psInst = (Instruction *)def.m_Instruction; + de.psOp = (Operand *)def.m_Operand; + de.index = index; + de.writeMask = def.m_Operand->GetAccessMask(); + de.psSiblings[index] = &de; + + return &de; +} + +// Do flow control analysis on the instructions and build the define-use and use-define chains +void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg) +{ + ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp + + psDUChain.clear(); + psUDChain.clear(); + + for (uint32_t i = 0; i < ui32NumTemps * 4; i++) + { + psUDChain.insert(std::make_pair(i, UseDefineChain())); + psDUChain.insert(std::make_pair(i, DefineUseChain())); + } + + const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks(); + + // Loop through each block, first calculate the union of all the reachables of all preceding blocks + // and then build on that as we go along the basic block instructions + for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr &bptr) + { + const BasicBlock &b = *bptr.get(); + BasicBlock::ReachableVariables rvars; + for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock) + { + const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock); + BasicBlock::RVarUnion(rvars, b.Reachable()); + }); + + // Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions + for (const Instruction *inst = b.First(); inst <= b.Last(); inst++) + { + // Process sources first + ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Add an use for all visible definitions + psUDChain[regIdx].push_front(UseDefineChainEntry()); + UseDefineChainEntry &ue = *psUDChain[regIdx].begin(); + ue.psInst = (Instruction *)psInst; + ue.psOp = (Operand *)psOperand; + ue.accessMask = accessMask; + ue.index = k; + ue.psSiblings[k] = &ue; + // ue.siblings will be filled out later. + + BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx]; + for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def) + { + DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k); + ue.defines.insert(duentry); + duentry->usages.insert(&ue); + }); + } + return; + }); + + // Then the destination operands + ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND, + [&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType) + { + if (psOperand->eType != OPERAND_TYPE_TEMP) + return; + + uint32_t tempReg = psOperand->ui32RegisterNumber; + uint32_t accessMask = psOperand->GetAccessMask(); + + // Go through each component + for (int k = 0; k < 4; k++) + { + if (!(accessMask & (1 << k))) + continue; + + uint32_t regIdx = tempReg * 4 + k; + + // Overwrite whatever's in rvars; they are killed by this + rvars[regIdx].clear(); + rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand)); + + // Make sure the definition gets created even though it doesn't have any uses at all + // (happens when sampling a texture but not all channels are used etc). + GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k); + } + return; + }); + } + }); + + // Connect the siblings for all uses and definitions + for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair &udpair) + { + UseDefineChain &ud = udpair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = udpair.first & ~(3); + + for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue) + { + ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(ue.accessMask & (1 << k)) || ue.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; }); + ASSERT(siblItr != psUDChain[baseReg + k].end()); + UseDefineChainEntry &sibling = *siblItr; + ue.psSiblings[k] = &sibling; + } + }); + }); + + // Same for definitions + for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair &dupair) + { + DefineUseChain &du = dupair.second; + // Clear out the bottom 2 bits to get the actual base reg + uint32_t baseReg = dupair.first & ~(3); + + for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de) + { + ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber); + + // Go through each component + for (int k = 0; k < 4; k++) + { + // Skip components that we don't access, or the one that's our own + if (!(de.writeMask & (1 << k)) || de.index == k) + continue; + + // Find the corresponding sibling. We can uniquely identify it by the operand pointer alone. + DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; }); + ASSERT(siblItr != psDUChain[baseReg + k].end()); + DefineUseChainEntry &sibling = *siblItr; + de.psSiblings[k] = &sibling; + } + }); + }); + +#if DEBUG_UDCHAINS + UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions); +#endif +} + +typedef std::vector SplitDefinitions; + +// Split out a define to use a new temp register +static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + uint32_t newReg = *psNumTemps; + uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber; + uint32_t accessMask = defs[0]->writeMask; + uint32_t i, u32def; + uint32_t rebase, count; + uint32_t splitTableValue; + + ASSERT(defs.size() > 0); + for (i = 1; i < defs.size(); i++) + { + ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg); + accessMask |= defs[i]->writeMask; + } + + + (*psNumTemps)++; + + +#if DEBUG_UDCHAINS + UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions()); +#endif + ASSERT(accessMask != 0 && accessMask <= 0xf); + // Calculate rebase value and component count + rebase = 0; + count = 0; + i = accessMask; + while ((i & 1) == 0) + { + rebase++; + i = i >> 1; + } + while (i != 0) + { + count++; + i = i >> 1; + } + + // Make sure there's enough room in the split table + if (pui32SplitTable.size() <= newReg) + { + size_t newSize = pui32SplitTable.size() * 2; + pui32SplitTable.resize(newSize, 0xffffffff); + } + + // Set the original temp of the new register + { + uint32_t origTemp = oldReg; + while (pui32SplitTable[origTemp] != 0xffffffff) + origTemp = pui32SplitTable[origTemp] & 0xffff; + + ASSERT(rebase < 4); + ASSERT(count <= 4); + splitTableValue = (count << 24) | (rebase << 16) | origTemp; + + pui32SplitTable[newReg] = splitTableValue; + } + + // Insert the new temps to the map + for (i = newReg * 4; i < newReg * 4 + 4; i++) + { + psUDChains.insert(std::make_pair(i, UseDefineChain())); + psDUChains.insert(std::make_pair(i, DefineUseChain())); + } + + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *defineToSplit = defs[u32def]; + uint32_t oldIdx = defineToSplit->index; +#if DEBUG_UDCHAINS + printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count); +#endif + + // We may have moved the opcodes already because of multiple defines pointing to the same op + if (defineToSplit->psOp->ui32RegisterNumber != newReg) + { + ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg); + // Update the declaration operand + // Don't change possible suboperands as they are sources + defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + defineToSplit->writeMask >>= rebase; + defineToSplit->index -= rebase; + // Change the temp register number for all usages + UsageSet::iterator ul = defineToSplit->usages.begin(); + while (ul != defineToSplit->usages.end()) + { + // Already updated by one of the siblings? Skip. + if ((*ul)->psOp->ui32RegisterNumber != newReg) + { + ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg); + (*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase); + } + + // Update the UD chain + { + UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin(); + while (udLoc != psUDChains[oldReg * 4 + oldIdx].end()) + { + if (&*udLoc == *ul) + { + // Move to new list + psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc); + + if (rebase > 0) + { + (*ul)->accessMask >>= rebase; + (*ul)->index -= rebase; + memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *)); + } + break; + } + udLoc++; + } + } + + ul++; + } + + // Move the define out of the old chain (if its still there) + { + // Find the define in the old chain + DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin(); + while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit)) + { + duLoc++; + } + ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end()); + { + // Move directly to new chain + psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc); + if (rebase != 0) + { + memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *)); + } + } + } + } + +#if DEBUG_UDCHAINS + UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions()); +#endif +} + +// Adds a define and all its siblings to the list, checking duplicates +static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef) +{ + uint32_t k; + for (k = 0; k < 4; k++) + { + if (newDef->psSiblings[k]) + { + DefineUseChainEntry *defToAdd = newDef->psSiblings[k]; + uint32_t m; + int defFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (defs[m] == defToAdd) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + defs.push_back(newDef->psSiblings[k]); + } + } + } +} + +// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place +static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + uint32_t reg; + uint32_t combinedMask; + uint32_t i, k, u32def; + int canSplit = 1; + DefineUseChain::iterator du; + int hasLeftoverDefinitions = 0; + // Initial checks: all definitions must: + // Access the same register + // Have at least one definition in any of the 4 register slots that isn't included + if (defs.empty()) + return 0; + + reg = defs[0]->psOp->ui32RegisterNumber; + combinedMask = defs[0]->writeMask; + for (i = 1; i < defs.size(); i++) + { + if (reg != defs[i]->psOp->ui32RegisterNumber) + return 0; + + combinedMask |= defs[i]->writeMask; + } + for (i = 0; i < 4; i++) + { + du = psDUChains[reg * 4 + i].begin(); + while (du != psDUChains[reg * 4 + i].end()) + { + int defFound = 0; + for (k = 0; k < defs.size(); k++) + { + if (&*du == defs[k]) + { + defFound = 1; + break; + } + } + if (defFound == 0) + { + hasLeftoverDefinitions = 1; + break; + } + du++; + } + if (hasLeftoverDefinitions) + break; + } + // We'd be splitting the entire register and all its definitions, no point in that. + if (hasLeftoverDefinitions == 0) + return 0; + + // Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array. + for (u32def = 0; u32def < defs.size(); u32def++) + { + DefineUseChainEntry *def = defs[u32def]; + + UsageSet::iterator ul = def->usages.begin(); + while (ul != def->usages.end()) + { + uint32_t j; + + // Check that we only read a subset of the combined writemask + if (((*ul)->accessMask & (~combinedMask)) != 0) + { + // Do an additional attempt, pick up all the sibling definitions as well + // Only do this if we have the space in the definitions table + for (j = 0; j < 4; j++) + { + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin()); + } + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + } + + // It must have at least one declaration + ASSERT(!(*ul)->defines.empty()); + + // Check that all siblings for the usage use one of the definitions + for (j = 0; j < 4; j++) + { + uint32_t m; + int defineFound = 0; + if (((*ul)->accessMask & (1 << j)) == 0) + continue; + + ASSERT((*ul)->psSiblings[j] != NULL); + ASSERT(!(*ul)->psSiblings[j]->defines.empty()); + + // Check that all definitions for this usage are found from the definitions table + DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin(); + while (dl != (*ul)->psSiblings[j]->defines.end()) + { + defineFound = 0; + for (m = 0; m < defs.size(); m++) + { + if (*dl == defs[m]) + { + defineFound = 1; + break; + } + } + if (defineFound == 0) + { + // Add this define and all its siblings to the table and try again + AddDefineToList(defs, *dl); + return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + } + + dl++; + } + + if (defineFound == 0) + { + canSplit = 0; + break; + } + } + if (canSplit == 0) + break; + + // This'll do, check next usage + ul++; + } + if (canSplit == 0) + break; + } + if (canSplit) + { + UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable); + return 1; + } + return 0; +} + +// Do temp splitting based on use-define chains +void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable) +{ + // Algorithm overview: + // Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable), + // split it out. + uint32_t i; + uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition + for (i = 0; i < tempsAtStart * 4; i++) + { + // No definitions? + if (psDUChains[i].empty()) + continue; + + DefineUseChain::iterator du = psDUChains[i].begin(); + // Ok we have multiple definitions for a temp, check them through + while (du != psDUChains[i].end()) + { + SplitDefinitions sd; + AddDefineToList(sd, &*du); + du++; + // If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain + if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable)) + { + du = psDUChains[i].begin(); + } + } + } +} + +// Returns true if all the usages of this definitions are instructions that deal with floating point data +static bool HasOnlyFloatUsages(DefineUseChain::iterator du) +{ + UsageSet::iterator itr = du->usages.begin(); + for (; itr != du->usages.end(); itr++) + { + Instruction *psInst = (*itr)->psInst; + + if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT) + return false; + + switch (psInst->eOpcode) + { + case OPCODE_ADD: + case OPCODE_MUL: + case OPCODE_MOV: + case OPCODE_MAD: + case OPCODE_DIV: + case OPCODE_LOG: + case OPCODE_EXP: + case OPCODE_MAX: + case OPCODE_MIN: + case OPCODE_DP2: + case OPCODE_DP2ADD: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_RSQ: + case OPCODE_SQRT: + break; + default: + return false; + } + } + return true; +} + +// Based on the sampler precisions, downgrade the definitions if possible. +void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps) +{ + uint32_t madeProgress = 0; + do + { + uint32_t i; + madeProgress = 0; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT; + if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType) + && du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP + && du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT + && du->isStandalone + && HasOnlyFloatUsages(du)) + { + uint32_t sibl; + // Ok we can change the precision. + ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP); + ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT); + du->psOp->eMinPrecision = sType; + + // Update all the uses of all the siblings + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT || + (*ul)->psOp->eMinPrecision == sType); + // We may well write this multiple times to the same op but that's fine. + (*ul)->psOp->eMinPrecision = sType; + + ul++; + } + } + madeProgress = 1; + } + du++; + } + } + } + while (madeProgress != 0); +} + +void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps) +{ + uint32_t i; + for (i = 0; i < ui32NumTemps * 4; i++) + { + DefineUseChain::iterator du = psDUChains[i].begin(); + while (du != psDUChains[i].end()) + { + uint32_t sibl; + int isStandalone = 1; + if (du->isStandalone) + { + du++; + continue; + } + + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + + UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin(); + while (ul != du->psSiblings[sibl]->usages.end()) + { + uint32_t k; + ASSERT(!(*ul)->defines.empty()); + + // Need to check that all the siblings of this usage only see this definition's corresponding sibling + for (k = 0; k < 4; k++) + { + if (!(*ul)->psSiblings[k]) + continue; + + if ((*ul)->psSiblings[k]->defines.size() > 1 + || *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k]) + { + isStandalone = 0; + break; + } + } + if (isStandalone == 0) + break; + + ul++; + } + if (isStandalone == 0) + break; + } + + if (isStandalone) + { + // Yep, mark it + for (sibl = 0; sibl < 4; sibl++) + { + if (!du->psSiblings[sibl]) + continue; + du->psSiblings[sibl]->isStandalone = 1; + } + } + du++; + } + } +} + +// Write the uses and defines back to Instruction and Operand member lists. +void WriteBackUsesAndDefines(DefineUseChains &psDUChains) +{ + using namespace std; + // Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them + for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr) + { + const DefineUseChain &duChain = itr.second; + for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du) + { + for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage) + { + // Update instruction use list + du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp)); + // And the usage's definition + usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp)); + }); + }); + }); +} diff --git a/third_party/HLSLcc/src/cbstring/bsafe.c b/third_party/HLSLcc/src/cbstring/bsafe.c new file mode 100644 index 0000000..6503761 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bsafe.c @@ -0,0 +1,87 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bsafe.c + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#include +#include +#include "bsafe.h" + +#if 0 +static int bsafeShouldExit = 1; + +char * strcpy(char *dst, const char *src); +char * strcat(char *dst, const char *src); + +char * strcpy(char *dst, const char *src) +{ + dst = dst; + src = src; + fprintf(stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +char * strcat(char *dst, const char *src) +{ + dst = dst; + src = src; + fprintf(stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +char * (gets)(char * buf) { + buf = buf; + fprintf(stderr, "bsafe error: gets() is not safe, use bgets.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} +#endif + +char * (strncpy)(char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf(stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +char * (strncat)(char *dst, const char *src, size_t n) { + dst = dst; + src = src; + n = n; + fprintf(stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +char * (strtok)(char *s1, const char *s2) { + s1 = s1; + s2 = s2; + fprintf(stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +char * (strdup)(const char *s) { + s = s; + fprintf(stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n"); + if (bsafeShouldExit) exit(-1); + return NULL; +} + +#endif diff --git a/third_party/HLSLcc/src/cbstring/bsafe.h b/third_party/HLSLcc/src/cbstring/bsafe.h new file mode 100644 index 0000000..d921917 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bsafe.h @@ -0,0 +1,43 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bsafe.h + * + * This is an optional module that can be used to help enforce a safety + * standard based on pervasive usage of bstrlib. This file is not necessarily + * portable, however, it has been tested to work correctly with Intel's C/C++ + * compiler, WATCOM C/C++ v11.x and Microsoft Visual C++. + */ + +#ifndef BSTRLIB_BSAFE_INCLUDE +#define BSTRLIB_BSAFE_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310)) +/* This is caught in the linker, so its not necessary for gcc. */ +extern char * (gets)(char * buf); +#endif + +extern char * (strncpy)(char *dst, const char *src, size_t n); +extern char * (strncat)(char *dst, const char *src, size_t n); +extern char * (strtok)(char *s1, const char *s2); +extern char * (strdup)(const char *s); + +#undef strcpy +#undef strcat +#define strcpy(a, b) bsafe_strcpy(a,b) +#define strcat(a, b) bsafe_strcat(a,b) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/HLSLcc/src/cbstring/bstraux.c b/third_party/HLSLcc/src/cbstring/bstraux.c new file mode 100644 index 0000000..34cb3d3 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bstraux.c @@ -0,0 +1,1273 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstraux.c + * + * This file is not necessarily part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#include +#include +#include +#include +#include +#include "bstrlib.h" +#include "bstraux.h" + +/* bstring bTail (bstring b, int n) + * + * Return with a string of the last n characters of b. + */ +bstring bTail(bstring b, int n) +{ + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy(b); + return bmidstr(b, b->slen - n, n); +} + +/* bstring bHead (bstring b, int n) + * + * Return with a string of the first n characters of b. + */ +bstring bHead(bstring b, int n) +{ + if (b == NULL || n < 0 || (b->mlen < b->slen && b->mlen > 0)) return NULL; + if (n >= b->slen) return bstrcpy(b); + return bmidstr(b, 0, n); +} + +/* int bFill (bstring a, char c, int len) + * + * Fill a given bstring with the character in parameter c, for a length n. + */ +int bFill(bstring b, char c, int len) +{ + if (b == NULL || len < 0 || (b->mlen < b->slen && b->mlen > 0)) return -__LINE__; + b->slen = 0; + return bsetstr(b, len, NULL, c); +} + +/* int bReplicate (bstring b, int n) + * + * Replicate the contents of b end to end n times and replace it in b. + */ +int bReplicate(bstring b, int n) +{ + return bpattern(b, n * b->slen); +} + +/* int bReverse (bstring b) + * + * Reverse the contents of b in place. + */ +int bReverse(bstring b) +{ + int i, n, m; + unsigned char t; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + n = b->slen; + if (2 <= n) + { + m = ((unsigned)n) >> 1; + n--; + for (i = 0; i < m; i++) + { + t = b->data[n - i]; + b->data[n - i] = b->data[i]; + b->data[i] = t; + } + } + return 0; +} + +/* int bInsertChrs (bstring b, int pos, int len, unsigned char c, unsigned char fill) + * + * Insert a repeated sequence of a given character into the string at + * position pos for a length len. + */ +int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill) +{ + if (b == NULL || b->slen < 0 || b->mlen < b->slen || pos < 0 || len <= 0) return -__LINE__; + + if (pos > b->slen + && 0 > bsetstr(b, pos, NULL, fill)) return -__LINE__; + + if (0 > balloc(b, b->slen + len)) return -__LINE__; + if (pos < b->slen) memmove(b->data + pos + len, b->data + pos, b->slen - pos); + memset(b->data + pos, c, len); + b->slen += len; + b->data[b->slen] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* int bJustifyLeft (bstring b, int space) + * + * Left justify a string. + */ +int bJustifyLeft(bstring b, int space) +{ + int j, i, s, t; + unsigned char c = (unsigned char)space; + + if (b == NULL || b->slen < 0 || b->mlen < b->slen) return -__LINE__; + if (space != (int)c) return BSTR_OK; + + for (s = j = i = 0; i < b->slen; i++) + { + t = s; + s = c != (b->data[j] = b->data[i]); + j += (t | s); + } + if (j > 0 && b->data[j - 1] == c) j--; + + b->data[j] = (unsigned char)'\0'; + b->slen = j; + return BSTR_OK; +} + +/* int bJustifyRight (bstring b, int width, int space) + * + * Right justify a string to within a given width. + */ +int bJustifyRight(bstring b, int width, int space) +{ + int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft(b, space))) return ret; + if (b->slen <= width) + return bInsertChrs(b, 0, width - b->slen, (unsigned char)space, (unsigned char)space); + return BSTR_OK; +} + +/* int bJustifyCenter (bstring b, int width, int space) + * + * Center a string's non-white space characters to within a given width by + * inserting whitespaces at the beginning. + */ +int bJustifyCenter(bstring b, int width, int space) +{ + int ret; + if (width <= 0) return -__LINE__; + if (0 > (ret = bJustifyLeft(b, space))) return ret; + if (b->slen <= width) + return bInsertChrs(b, 0, (width - b->slen + 1) >> 1, (unsigned char)space, (unsigned char)space); + return BSTR_OK; +} + +/* int bJustifyMargin (bstring b, int width, int space) + * + * Stretch a string to flush against left and right margins by evenly + * distributing additional white space between words. If the line is too + * long to be margin justified, it is left justified. + */ +int bJustifyMargin(bstring b, int width, int space) +{ + struct bstrList * sl; + int i, l, c; + + if (b == NULL || b->slen < 0 || b->mlen == 0 || b->mlen < b->slen) return -__LINE__; + if (NULL == (sl = bsplit(b, (unsigned char)space))) return -__LINE__; + for (l = c = i = 0; i < sl->qty; i++) + { + if (sl->entry[i]->slen > 0) + { + c++; + l += sl->entry[i]->slen; + } + } + + if (l + c >= width || c < 2) + { + bstrListDestroy(sl); + return bJustifyLeft(b, space); + } + + b->slen = 0; + for (i = 0; i < sl->qty; i++) + { + if (sl->entry[i]->slen > 0) + { + if (b->slen > 0) + { + int s = (width - l + (c / 2)) / c; + bInsertChrs(b, b->slen, s, (unsigned char)space, (unsigned char)space); + l += s; + } + bconcat(b, sl->entry[i]); + c--; + if (c <= 0) break; + } + } + + bstrListDestroy(sl); + return BSTR_OK; +} + +static size_t readNothing(void *buff, size_t elsize, size_t nelem, void *parm) +{ + return 0; /* Immediately indicate EOF. */ +} + +/* struct bStream * bsFromBstr (const_bstring b); + * + * Create a bStream whose contents are a copy of the bstring passed in. + * This allows the use of all the bStream APIs with bstrings. + */ +struct bStream * bsFromBstr(const_bstring b) +{ + struct bStream * s = bsopen((bNread)readNothing, NULL); + bsunread(s, b); /* Push the bstring data into the empty bStream. */ + return s; +} + +static size_t readRef(void *buff, size_t elsize, size_t nelem, void *parm) +{ + struct tagbstring * t = (struct tagbstring *)parm; + size_t tsz = elsize * nelem; + + if (tsz > (size_t)t->slen) tsz = (size_t)t->slen; + if (tsz > 0) + { + memcpy(buff, t->data, tsz); + t->slen -= (int)tsz; + t->data += tsz; + return tsz / elsize; + } + return 0; +} + +/* The "by reference" version of the above function. This function puts + * a number of restrictions on the call site (the passed in struct + * tagbstring *will* be modified by this function, and the source data + * must remain alive and constant for the lifetime of the bStream). + * Hence it is not presented as an extern. + */ +static struct bStream * bsFromBstrRef(struct tagbstring * t) +{ + if (!t) return NULL; + return bsopen((bNread)readRef, t); +} + +/* char * bStr2NetStr (const_bstring b) + * + * Convert a bstring to a netstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note: 1) The value returned should be freed with a call to bcstrfree() at + * the point when it will no longer be referenced to avoid a memory + * leak. + * 2) If the returned value is non-NULL, then it also '\0' terminated + * in the character position one past the "," terminator. + */ +char * bStr2NetStr(const_bstring b) +{ + char strnum[sizeof(b->slen) * 3 + 1]; + bstring s; + unsigned char * buff; + + if (b == NULL || b->data == NULL || b->slen < 0) return NULL; + sprintf(strnum, "%d:", b->slen); + if (NULL == (s = bfromcstr(strnum)) + || bconcat(s, b) == BSTR_ERR || bconchar(s, (char)',') == BSTR_ERR) + { + bdestroy(s); + return NULL; + } + buff = s->data; + bcstrfree((char *)s); + return (char *)buff; +} + +/* bstring bNetStr2Bstr (const char * buf) + * + * Convert a netstring to a bstring. See + * http://cr.yp.to/proto/netstrings.txt for a description of netstrings. + * Note that the terminating "," *must* be present, however a following '\0' + * is *not* required. + */ +bstring bNetStr2Bstr(const char * buff) +{ + int i, x; + bstring b; + if (buff == NULL) return NULL; + x = 0; + for (i = 0; buff[i] != ':'; i++) + { + unsigned int v = buff[i] - '0'; + if (v > 9 || x > ((INT_MAX - (signed int)v) / 10)) return NULL; + x = (x * 10) + v; + } + + /* This thing has to be properly terminated */ + if (buff[i + 1 + x] != ',') return NULL; + + if (NULL == (b = bfromcstr(""))) return NULL; + if (balloc(b, x + 1) != BSTR_OK) + { + bdestroy(b); + return NULL; + } + memcpy(b->data, buff + i + 1, x); + b->data[x] = (unsigned char)'\0'; + b->slen = x; + return b; +} + +static char b64ETable[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + +/* bstring bBase64Encode (const_bstring b) + * + * Generate a base64 encoding. See: RFC1341 + */ +bstring bBase64Encode(const_bstring b) +{ + int i, c0, c1, c2, c3; + bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + out = bfromcstr(""); + for (i = 0; i + 2 < b->slen; i += 3) + { + if (i && ((i % 57) == 0)) + { + if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) + { + bdestroy(out); + return NULL; + } + } + c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i + 1] >> 4)) & 0x3F; + c2 = ((b->data[i + 1] << 2) | + (b->data[i + 2] >> 6)) & 0x3F; + c3 = b->data[i + 2] & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, b64ETable[c2]) < 0 || + bconchar(out, b64ETable[c3]) < 0) + { + bdestroy(out); + return NULL; + } + } + + if (i && ((i % 57) == 0)) + { + if (bconchar(out, (char)'\015') < 0 || bconchar(out, (char)'\012') < 0) + { + bdestroy(out); + return NULL; + } + } + + switch (i + 2 - b->slen) + { + case 0: c0 = b->data[i] >> 2; + c1 = ((b->data[i] << 4) | + (b->data[i + 1] >> 4)) & 0x3F; + c2 = (b->data[i + 1] << 2) & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, b64ETable[c2]) < 0 || + bconchar(out, (char)'=') < 0) + { + bdestroy(out); + return NULL; + } + break; + case 1: c0 = b->data[i] >> 2; + c1 = (b->data[i] << 4) & 0x3F; + if (bconchar(out, b64ETable[c0]) < 0 || + bconchar(out, b64ETable[c1]) < 0 || + bconchar(out, (char)'=') < 0 || + bconchar(out, (char)'=') < 0) + { + bdestroy(out); + return NULL; + } + break; + case 2: break; + } + + return out; +} + +#define B64_PAD (-2) +#define B64_ERR (-1) + +static int base64DecodeSymbol(unsigned char alpha) +{ + if ((alpha >= 'A') && (alpha <= 'Z')) return (int)(alpha - 'A'); + else if ((alpha >= 'a') && (alpha <= 'z')) + return 26 + (int)(alpha - 'a'); + else if ((alpha >= '0') && (alpha <= '9')) + return 52 + (int)(alpha - '0'); + else if (alpha == '+') return 62; + else if (alpha == '/') return 63; + else if (alpha == '=') return B64_PAD; + else return B64_ERR; +} + +/* bstring bBase64DecodeEx (const_bstring b, int * boolTruncError) + * + * Decode a base64 block of data. All MIME headers are assumed to have been + * removed. See: RFC1341 + */ +bstring bBase64DecodeEx(const_bstring b, int * boolTruncError) +{ + int i, v; + unsigned char c0, c1, c2; + bstring out; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + if (boolTruncError) *boolTruncError = 0; + out = bfromcstr(""); + i = 0; + for (;;) + { + do + { + if (i >= b->slen) return out; + if (b->data[i] == '=') /* Bad "too early" truncation */ + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c0 = (unsigned char)(v << 2); + do + { + if (i >= b->slen || b->data[i] == '=') /* Bad "too early" truncation */ + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c0 |= (unsigned char)(v >> 4); + c1 = (unsigned char)(v << 4); + do + { + if (i >= b->slen) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (b->data[i] == '=') + { + i++; + if (i >= b->slen || b->data[i] != '=' || bconchar(out, c0) < 0) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); /* Missing "=" at the end. */ + return NULL; + } + return out; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c1 |= (unsigned char)(v >> 2); + c2 = (unsigned char)(v << 6); + do + { + if (i >= b->slen) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (b->data[i] == '=') + { + if (bconchar(out, c0) < 0 || bconchar(out, c1) < 0) + { + if (boolTruncError) + { + *boolTruncError = 1; + return out; + } + bdestroy(out); + return NULL; + } + if (boolTruncError) *boolTruncError = 0; + return out; + } + v = base64DecodeSymbol(b->data[i]); + i++; + } + while (v < 0); + c2 |= (unsigned char)(v); + if (bconchar(out, c0) < 0 || + bconchar(out, c1) < 0 || + bconchar(out, c2) < 0) + { + if (boolTruncError) + { + *boolTruncError = -1; + return out; + } + bdestroy(out); + return NULL; + } + } +} + +#define UU_DECODE_BYTE(b) (((b) == (signed int)'`') ? 0 : (b) - (signed int)' ') + +struct bUuInOut +{ + bstring src, dst; + int * badlines; +}; + +#define UU_MAX_LINELEN 45 + +static int bUuDecLine(void * parm, int ofs, int len) +{ + struct bUuInOut * io = (struct bUuInOut *)parm; + bstring s = io->src; + bstring t = io->dst; + int i, llen, otlen, ret, c0, c1, c2, c3, d0, d1, d2, d3; + + if (len == 0) return 0; + llen = UU_DECODE_BYTE(s->data[ofs]); + ret = 0; + + otlen = t->slen; + + if (((unsigned)llen) > UU_MAX_LINELEN) + { + ret = -__LINE__; + goto bl; + } + + llen += t->slen; + + for (i = 1; i < s->slen && t->slen < llen; i += 4) + { + unsigned char outoctet[3]; + c0 = UU_DECODE_BYTE(d0 = (int)bchare(s, i + ofs + 0, ' ' - 1)); + c1 = UU_DECODE_BYTE(d1 = (int)bchare(s, i + ofs + 1, ' ' - 1)); + c2 = UU_DECODE_BYTE(d2 = (int)bchare(s, i + ofs + 2, ' ' - 1)); + c3 = UU_DECODE_BYTE(d3 = (int)bchare(s, i + ofs + 3, ' ' - 1)); + + if (((unsigned)(c0 | c1) >= 0x40)) + { + if (!ret) ret = -__LINE__; + if (d0 > 0x60 || (d0 < (' ' - 1) && !isspace(d0)) || + d1 > 0x60 || (d1 < (' ' - 1) && !isspace(d1))) + { + t->slen = otlen; + goto bl; + } + c0 = c1 = 0; + } + outoctet[0] = (unsigned char)((c0 << 2) | ((unsigned)c1 >> 4)); + if (t->slen + 1 >= llen) + { + if (0 > bconchar(t, (char)outoctet[0])) return -__LINE__; + break; + } + if ((unsigned)c2 >= 0x40) + { + if (!ret) ret = -__LINE__; + if (d2 > 0x60 || (d2 < (' ' - 1) && !isspace(d2))) + { + t->slen = otlen; + goto bl; + } + c2 = 0; + } + outoctet[1] = (unsigned char)((c1 << 4) | ((unsigned)c2 >> 2)); + if (t->slen + 2 >= llen) + { + if (0 > bcatblk(t, outoctet, 2)) return -__LINE__; + break; + } + if ((unsigned)c3 >= 0x40) + { + if (!ret) ret = -__LINE__; + if (d3 > 0x60 || (d3 < (' ' - 1) && !isspace(d3))) + { + t->slen = otlen; + goto bl; + } + c3 = 0; + } + outoctet[2] = (unsigned char)((c2 << 6) | ((unsigned)c3)); + if (0 > bcatblk(t, outoctet, 3)) return -__LINE__; + } + if (t->slen < llen) + { + if (0 == ret) ret = -__LINE__; + t->slen = otlen; + } +bl:; + if (ret && io->badlines) + { + (*io->badlines)++; + return 0; + } + return ret; +} + +/* bstring bUuDecodeEx (const_bstring src, int * badlines) + * + * Performs a UUDecode of a block of data. If there are errors in the + * decoding, they are counted up and returned in "badlines", if badlines is + * not NULL. It is assumed that the "begin" and "end" lines have already + * been stripped off. The potential security problem of writing the + * filename in the begin line is something that is beyond the scope of a + * portable library. + */ + +#ifdef _MSC_VER +#pragma warning(disable:4204) +#endif + +bstring bUuDecodeEx(const_bstring src, int * badlines) +{ + struct tagbstring t; + struct bStream * s; + struct bStream * d; + bstring b; + + if (!src) return NULL; + t = *src; /* Short lifetime alias to header of src */ + s = bsFromBstrRef(&t); /* t is undefined after this */ + if (!s) return NULL; + d = bsUuDecode(s, badlines); + b = bfromcstralloc(256, ""); + if (NULL == b || 0 > bsread(b, d, INT_MAX)) + { + bdestroy(b); + bsclose(d); + bsclose(s); + return NULL; + } + return b; +} + +struct bsUuCtx +{ + struct bUuInOut io; + struct bStream * sInp; +}; + +static size_t bsUuDecodePart(void *buff, size_t elsize, size_t nelem, void *parm) +{ + static struct tagbstring eol = bsStatic("\r\n"); + struct bsUuCtx * luuCtx = (struct bsUuCtx *)parm; + size_t tsz; + int l, lret; + + if (NULL == buff || NULL == parm) return 0; + tsz = elsize * nelem; + +CheckInternalBuffer:; + /* If internal buffer has sufficient data, just output it */ + if (((size_t)luuCtx->io.dst->slen) > tsz) + { + memcpy(buff, luuCtx->io.dst->data, tsz); + bdelete(luuCtx->io.dst, 0, (int)tsz); + return nelem; + } + +DecodeMore:; + if (0 <= (l = binchr(luuCtx->io.src, 0, &eol))) + { + int ol = 0; + struct tagbstring t; + bstring s = luuCtx->io.src; + luuCtx->io.src = &t; + + do + { + if (l > ol) + { + bmid2tbstr(t, s, ol, l - ol); + lret = bUuDecLine(&luuCtx->io, 0, t.slen); + if (0 > lret) + { + luuCtx->io.src = s; + goto Done; + } + } + ol = l + 1; + if (((size_t)luuCtx->io.dst->slen) > tsz) break; + l = binchr(s, ol, &eol); + } + while (BSTR_ERR != l); + bdelete(s, 0, ol); + luuCtx->io.src = s; + goto CheckInternalBuffer; + } + + if (BSTR_ERR != bsreada(luuCtx->io.src, luuCtx->sInp, bsbufflength(luuCtx->sInp, BSTR_BS_BUFF_LENGTH_GET))) + { + goto DecodeMore; + } + + bUuDecLine(&luuCtx->io, 0, luuCtx->io.src->slen); + +Done:; + /* Output any lingering data that has been translated */ + if (((size_t)luuCtx->io.dst->slen) > 0) + { + if (((size_t)luuCtx->io.dst->slen) > tsz) goto CheckInternalBuffer; + memcpy(buff, luuCtx->io.dst->data, luuCtx->io.dst->slen); + tsz = luuCtx->io.dst->slen / elsize; + luuCtx->io.dst->slen = 0; + if (tsz > 0) return tsz; + } + + /* Deallocate once EOF becomes triggered */ + bdestroy(luuCtx->io.dst); + bdestroy(luuCtx->io.src); + free(luuCtx); + return 0; +} + +/* bStream * bsUuDecode (struct bStream * sInp, int * badlines) + * + * Creates a bStream which performs the UUDecode of an an input stream. If + * there are errors in the decoding, they are counted up and returned in + * "badlines", if badlines is not NULL. It is assumed that the "begin" and + * "end" lines have already been stripped off. The potential security + * problem of writing the filename in the begin line is something that is + * beyond the scope of a portable library. + */ + +struct bStream * bsUuDecode(struct bStream * sInp, int * badlines) +{ + struct bsUuCtx * luuCtx = (struct bsUuCtx *)malloc(sizeof(struct bsUuCtx)); + struct bStream * sOut; + + if (NULL == luuCtx) return NULL; + + luuCtx->io.src = bfromcstr(""); + luuCtx->io.dst = bfromcstr(""); + if (NULL == luuCtx->io.dst || NULL == luuCtx->io.src) + { + CleanUpFailureToAllocate :; + bdestroy(luuCtx->io.dst); + bdestroy(luuCtx->io.src); + free(luuCtx); + return NULL; + } + luuCtx->io.badlines = badlines; + if (badlines) *badlines = 0; + + luuCtx->sInp = sInp; + + sOut = bsopen((bNread)bsUuDecodePart, luuCtx); + if (NULL == sOut) goto CleanUpFailureToAllocate; + return sOut; +} + +#define UU_ENCODE_BYTE(b) (char) (((b) == 0) ? '`' : ((b) + ' ')) + +/* bstring bUuEncode (const_bstring src) + * + * Performs a UUEncode of a block of data. The "begin" and "end" lines are + * not appended. + */ +bstring bUuEncode(const_bstring src) +{ + bstring out; + int i, j, jm; + unsigned int c0, c1, c2; + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + for (i = 0; i < src->slen; i += UU_MAX_LINELEN) + { + if ((jm = i + UU_MAX_LINELEN) > src->slen) jm = src->slen; + if (bconchar(out, UU_ENCODE_BYTE(jm - i)) < 0) + { + bstrFree(out); + break; + } + for (j = i; j < jm; j += 3) + { + c0 = (unsigned int)bchar(src, j); + c1 = (unsigned int)bchar(src, j + 1); + c2 = (unsigned int)bchar(src, j + 2); + if (bconchar(out, UU_ENCODE_BYTE((c0 & 0xFC) >> 2)) < 0 || + bconchar(out, UU_ENCODE_BYTE(((c0 & 0x03) << 4) | ((c1 & 0xF0) >> 4))) < 0 || + bconchar(out, UU_ENCODE_BYTE(((c1 & 0x0F) << 2) | ((c2 & 0xC0) >> 6))) < 0 || + bconchar(out, UU_ENCODE_BYTE((c2 & 0x3F))) < 0) + { + bstrFree(out); + goto End; + } + } + if (bconchar(out, (char)'\r') < 0 || bconchar(out, (char)'\n') < 0) + { + bstrFree(out); + break; + } + } +End:; + return out; +} + +/* bstring bYEncode (const_bstring src) + * + * Performs a YEncode of a block of data. No header or tail info is + * appended. See: http://www.yenc.org/whatis.htm and + * http://www.yenc.org/yenc-draft.1.3.txt + */ +bstring bYEncode(const_bstring src) +{ + int i; + bstring out; + unsigned char c; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + for (i = 0; i < src->slen; i++) + { + c = (unsigned char)(src->data[i] + 42); + if (c == '=' || c == '\0' || c == '\r' || c == '\n') + { + if (0 > bconchar(out, (char)'=')) + { + bdestroy(out); + return NULL; + } + c += (unsigned char)64; + } + if (0 > bconchar(out, c)) + { + bdestroy(out); + return NULL; + } + } + return out; +} + +/* bstring bYDecode (const_bstring src) + * + * Performs a YDecode of a block of data. See: + * http://www.yenc.org/whatis.htm and http://www.yenc.org/yenc-draft.1.3.txt + */ +#define MAX_OB_LEN (64) + +bstring bYDecode(const_bstring src) +{ + int i; + bstring out; + unsigned char c; + unsigned char octetbuff[MAX_OB_LEN]; + int obl; + + if (src == NULL || src->slen < 0 || src->data == NULL) return NULL; + if ((out = bfromcstr("")) == NULL) return NULL; + + obl = 0; + + for (i = 0; i < src->slen; i++) + { + if ('=' == (c = src->data[i])) /* The = escape mode */ + { + i++; + if (i >= src->slen) + { + bdestroy(out); + return NULL; + } + c = (unsigned char)(src->data[i] - 64); + } + else + { + if ('\0' == c) + { + bdestroy(out); + return NULL; + } + + /* Extraneous CR/LFs are to be ignored. */ + if (c == '\r' || c == '\n') continue; + } + + octetbuff[obl] = (unsigned char)((int)c - 42); + obl++; + + if (obl >= MAX_OB_LEN) + { + if (0 > bcatblk(out, octetbuff, obl)) + { + bdestroy(out); + return NULL; + } + obl = 0; + } + } + + if (0 > bcatblk(out, octetbuff, obl)) + { + bdestroy(out); + out = NULL; + } + return out; +} + +/* bstring bStrfTime (const char * fmt, const struct tm * timeptr) + * + * Takes a format string that is compatible with strftime and a struct tm + * pointer, formats the time according to the format string and outputs + * the bstring as a result. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +bstring bStrfTime(const char * fmt, const struct tm * timeptr) +{ +#if defined(__TURBOC__) && !defined(__BORLANDC__) + static struct tagbstring ns = bsStatic("bStrfTime Not supported"); + fmt = fmt; + timeptr = timeptr; + return &ns; +#else + bstring buff; + int n; + size_t r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "strftime" call on increasing + potential sizes for the output result. */ + + if ((n = (int)(2 * strlen(fmt))) < 16) n = 16; + buff = bfromcstralloc(n + 2, ""); + + for (;;) + { + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return NULL; + } + + r = strftime((char *)buff->data, n + 1, fmt, timeptr); + + if (r > 0) + { + buff->slen = (int)r; + break; + } + + n += n; + } + + return buff; +#endif +} + +/* int bSetCstrChar (bstring a, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * If the character c is NUL ('\0') then the string is truncated at this + * point. Note: this does not enable any other '\0' character in the bstring + * as terminator indicator for the string. pos must be in the position + * between 0 and b->slen inclusive, otherwise BSTR_ERR will be returned. + */ +int bSetCstrChar(bstring b, int pos, char c) +{ + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) + { + if ('\0' != c) return bconchar(b, c); + return 0; + } + + b->data[pos] = (unsigned char)c; + if ('\0' == c) b->slen = pos; + + return 0; +} + +/* int bSetChar (bstring b, int pos, char c) + * + * Sets the character at position pos to the character c in the bstring a. + * The string is not truncated if the character c is NUL ('\0'). pos must + * be in the position between 0 and b->slen inclusive, otherwise BSTR_ERR + * will be returned. + */ +int bSetChar(bstring b, int pos, char c) +{ + if (NULL == b || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen) + return BSTR_ERR; + if (pos < 0 || pos > b->slen) return BSTR_ERR; + + if (pos == b->slen) + { + return bconchar(b, c); + } + + b->data[pos] = (unsigned char)c; + return 0; +} + +#define INIT_SECURE_INPUT_LENGTH (256) + +/* bstring bSecureInput (int maxlen, int termchar, + * bNgetc vgetchar, void * vgcCtx) + * + * Read input from an abstracted input interface, for a length of at most + * maxlen characters. If maxlen <= 0, then there is no length limit put + * on the input. The result is terminated early if vgetchar() return EOF + * or the user specified value termchar. + * + */ +bstring bSecureInput(int maxlen, int termchar, bNgetc vgetchar, void * vgcCtx) +{ + int i, m, c; + bstring b, t; + + if (!vgetchar) return NULL; + + b = bfromcstralloc(INIT_SECURE_INPUT_LENGTH, ""); + if ((c = UCHAR_MAX + 1) == termchar) c++; + + for (i = 0;; i++) + { + if (termchar == c || (maxlen > 0 && i >= maxlen)) c = EOF; + else c = vgetchar(vgcCtx); + + if (EOF == c) break; + + if (i + 1 >= b->mlen) + { + /* Double size, but deal with unusual case of numeric + overflows */ + + if ((m = b->mlen << 1) <= b->mlen && + (m = b->mlen + 1024) <= b->mlen && + (m = b->mlen + 16) <= b->mlen && + (m = b->mlen + 1) <= b->mlen) t = NULL; + else t = bfromcstralloc(m, ""); + + if (t) memcpy(t->data, b->data, i); + bSecureDestroy(b); /* Cleanse previous buffer */ + b = t; + if (!b) return b; + } + + b->data[i] = (unsigned char)c; + } + + b->slen = i; + b->data[i] = (unsigned char)'\0'; + return b; +} + +#define BWS_BUFF_SZ (1024) + +struct bwriteStream +{ + bstring buff; /* Buffer for underwrites */ + void * parm; /* The stream handle for core stream */ + bNwrite writeFn; /* fwrite work-a-like fnptr for core stream */ + int isEOF; /* track stream's EOF state */ + int minBuffSz; +}; + +/* struct bwriteStream * bwsOpen (bNwrite writeFn, void * parm) + * + * Wrap a given open stream (described by a fwrite work-a-like function + * pointer and stream handle) into an open bwriteStream suitable for write + * streaming functions. + */ +struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm) +{ + struct bwriteStream * ws; + + if (NULL == writeFn) return NULL; + ws = (struct bwriteStream *)malloc(sizeof(struct bwriteStream)); + if (ws) + { + if (NULL == (ws->buff = bfromcstr(""))) + { + free(ws); + ws = NULL; + } + else + { + ws->parm = parm; + ws->writeFn = writeFn; + ws->isEOF = 0; + ws->minBuffSz = BWS_BUFF_SZ; + } + } + return ws; +} + +#define internal_bwswriteout(ws, b) {\ + if ((b)->slen > 0) { \ + if (1 != (ws->writeFn ((b)->data, (b)->slen, 1, ws->parm))) { \ + ws->isEOF = 1; \ + return BSTR_ERR; \ + } \ + } \ +} + +/* int bwsWriteFlush (struct bwriteStream * ws) + * + * Force any pending data to be written to the core stream. + */ +int bwsWriteFlush(struct bwriteStream * ws) +{ + if (NULL == ws || ws->isEOF || 0 >= ws->minBuffSz || + NULL == ws->writeFn || NULL == ws->buff) return BSTR_ERR; + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + return 0; +} + +/* int bwsWriteBstr (struct bwriteStream * ws, const_bstring b) + * + * Send a bstring to a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. Note that there is no deterministic way to determine the exact + * cut off point where the core stream stopped accepting data. + */ +int bwsWriteBstr(struct bwriteStream * ws, const_bstring b) +{ + struct tagbstring t; + int l; + + if (NULL == ws || NULL == b || NULL == ws->buff || + ws->isEOF || 0 >= ws->minBuffSz || NULL == ws->writeFn) + return BSTR_ERR; + + /* Buffer prepacking optimization */ + if (b->slen > 0 && ws->buff->mlen - ws->buff->slen > b->slen) + { + static struct tagbstring empty = bsStatic(""); + if (0 > bconcat(ws->buff, b)) return BSTR_ERR; + return bwsWriteBstr(ws, &empty); + } + + if (0 > (l = ws->minBuffSz - ws->buff->slen)) + { + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + l = ws->minBuffSz; + } + + if (b->slen < l) return bconcat(ws->buff, b); + + if (0 > bcatblk(ws->buff, b->data, l)) return BSTR_ERR; + internal_bwswriteout(ws, ws->buff); + ws->buff->slen = 0; + + bmid2tbstr(t, (bstring)b, l, b->slen); + + if (t.slen >= ws->minBuffSz) + { + internal_bwswriteout(ws, &t); + return 0; + } + + return bassign(ws->buff, &t); +} + +/* int bwsWriteBlk (struct bwriteStream * ws, void * blk, int len) + * + * Send a block of data a bwriteStream. If the stream is at EOF BSTR_ERR is + * returned. + */ +int bwsWriteBlk(struct bwriteStream * ws, void * blk, int len) +{ + struct tagbstring t; + if (NULL == blk || len < 0) return BSTR_ERR; + blk2tbstr(t, blk, len); + return bwsWriteBstr(ws, &t); +} + +/* int bwsIsEOF (const struct bwriteStream * ws) + * + * Returns 0 if the stream is currently writable, 1 if the core stream has + * responded by not accepting the previous attempted write. + */ +int bwsIsEOF(const struct bwriteStream * ws) +{ + if (NULL == ws || NULL == ws->buff || 0 > ws->minBuffSz || + NULL == ws->writeFn) return BSTR_ERR; + return ws->isEOF; +} + +/* int bwsBuffLength (struct bwriteStream * ws, int sz) + * + * Set the length of the buffer used by the bwsStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bwsBuffLength(struct bwriteStream * ws, int sz) +{ + int oldSz; + if (ws == NULL || sz < 0) return BSTR_ERR; + oldSz = ws->minBuffSz; + if (sz > 0) ws->minBuffSz = sz; + return oldSz; +} + +/* void * bwsClose (struct bwriteStream * s) + * + * Close the bwriteStream, and return the handle to the stream that was + * originally used to open the given stream. Note that even if the stream + * is at EOF it still needs to be closed with a call to bwsClose. + */ +void * bwsClose(struct bwriteStream * ws) +{ + void * parm; + if (NULL == ws || NULL == ws->buff || 0 >= ws->minBuffSz || + NULL == ws->writeFn) return NULL; + bwsWriteFlush(ws); + parm = ws->parm; + ws->parm = NULL; + ws->minBuffSz = -1; + ws->writeFn = NULL; + bstrFree(ws->buff); + free(ws); + return parm; +} diff --git a/third_party/HLSLcc/src/cbstring/bstraux.h b/third_party/HLSLcc/src/cbstring/bstraux.h new file mode 100644 index 0000000..e31929f --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bstraux.h @@ -0,0 +1,112 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstraux.h + * + * This file is not a necessary part of the core bstring library itself, but + * is just an auxilliary module which includes miscellaneous or trivial + * functions. + */ + +#ifndef BSTRAUX_INCLUDE +#define BSTRAUX_INCLUDE + +#include +#include "bstrlib.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Safety mechanisms */ +#define bstrDeclare(b) bstring (b) = NULL; +#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }} + +/* Backward compatibilty with previous versions of Bstrlib */ +#define bAssign(a, b) ((bassign)((a), (b))) +#define bSubs(b, pos, len, a, c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c))) +#define bStrchr(b, c) ((bstrchr)((b), (c))) +#define bStrchrFast(b, c) ((bstrchr)((b), (c))) +#define bCatCstr(b, s) ((bcatcstr)((b), (s))) +#define bCatBlk(b, s, len) ((bcatblk)((b),(s),(len))) +#define bCatStatic(b, s) bCatBlk ((b), ("" s ""), sizeof (s) - 1) +#define bTrunc(b, n) ((btrunc)((b), (n))) +#define bReplaceAll(b, find, repl, pos) ((bfindreplace)((b),(find),(repl),(pos))) +#define bUppercase(b) ((btoupper)(b)) +#define bLowercase(b) ((btolower)(b)) +#define bCaselessCmp(a, b) ((bstricmp)((a), (b))) +#define bCaselessNCmp(a, b, n) ((bstrnicmp)((a), (b), (n))) +#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL)) +#define bUuDecode(b) (bUuDecodeEx ((b), NULL)) + +/* Unusual functions */ +extern struct bStream * bsFromBstr(const_bstring b); +extern bstring bTail(bstring b, int n); +extern bstring bHead(bstring b, int n); +extern int bSetCstrChar(bstring a, int pos, char c); +extern int bSetChar(bstring b, int pos, char c); +extern int bFill(bstring a, char c, int len); +extern int bReplicate(bstring b, int n); +extern int bReverse(bstring b); +extern int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill); +extern bstring bStrfTime(const char * fmt, const struct tm * timeptr); +#define bAscTime(t) (bStrfTime ("%c\n", (t))) +#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL) + +/* Spacing formatting */ +extern int bJustifyLeft(bstring b, int space); +extern int bJustifyRight(bstring b, int width, int space); +extern int bJustifyMargin(bstring b, int width, int space); +extern int bJustifyCenter(bstring b, int width, int space); + +/* Esoteric standards specific functions */ +extern char * bStr2NetStr(const_bstring b); +extern bstring bNetStr2Bstr(const char * buf); +extern bstring bBase64Encode(const_bstring b); +extern bstring bBase64DecodeEx(const_bstring b, int * boolTruncError); +extern struct bStream * bsUuDecode(struct bStream * sInp, int * badlines); +extern bstring bUuDecodeEx(const_bstring src, int * badlines); +extern bstring bUuEncode(const_bstring src); +extern bstring bYEncode(const_bstring src); +extern bstring bYDecode(const_bstring src); + +/* Writable stream */ +typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm); + +struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm); +int bwsWriteBstr(struct bwriteStream * stream, const_bstring b); +int bwsWriteBlk(struct bwriteStream * stream, void * blk, int len); +int bwsWriteFlush(struct bwriteStream * stream); +int bwsIsEOF(const struct bwriteStream * stream); +int bwsBuffLength(struct bwriteStream * stream, int sz); +void * bwsClose(struct bwriteStream * stream); + +/* Security functions */ +#define bSecureDestroy(b) { \ +bstring bstr__tmp = (b); \ + if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \ + (void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \ + bdestroy (bstr__tmp); \ + } \ +} +#define bSecureWriteProtect(t) { \ + if ((t).mlen >= 0) { \ + if ((t).mlen > (t).slen)) { \ + (void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \ + } \ + (t).mlen = -1; \ + } \ +} +extern bstring bSecureInput(int maxlen, int termchar, + bNgetc vgetchar, void * vgcCtx); + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.c b/third_party/HLSLcc/src/cbstring/bstrlib.c new file mode 100644 index 0000000..e1a8590 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bstrlib.c @@ -0,0 +1,3280 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.c + * + * This file is the core module for implementing the bstring functions. + */ + +#include +#include +#include +#include +#include +#include +#include "bstrlib.h" + +/* Optionally include a mechanism for debugging memory */ + +#if defined(MEMORY_DEBUG) || defined(BSTRLIB_MEMORY_DEBUG) +#include "memdbg.h" +#endif + +#ifndef bstr__alloc +#define bstr__alloc(x) malloc (x) +#endif + +#ifndef bstr__free +#define bstr__free(p) free (p) +#endif + +#ifndef bstr__realloc +#define bstr__realloc(p, x) realloc ((p), (x)) +#endif + +#ifndef bstr__memcpy +#define bstr__memcpy(d, s, l) memcpy ((d), (s), (l)) +#endif + +#ifndef bstr__memmove +#define bstr__memmove(d, s, l) memmove ((d), (s), (l)) +#endif + +#ifndef bstr__memset +#define bstr__memset(d, c, l) memset ((d), (c), (l)) +#endif + +#ifndef bstr__memcmp +#define bstr__memcmp(d, c, l) memcmp ((d), (c), (l)) +#endif + +#ifndef bstr__memchr +#define bstr__memchr(s, c, l) memchr ((s), (c), (l)) +#endif + +/* Just a length safe wrapper for memmove. */ + +#define bBlockCopy(D, S, L) { if ((L) > 0) bstr__memmove ((D),(S),(L)); } + +/* Compute the snapped size for a given requested size. By snapping to powers + of 2 like this, repeated reallocations are avoided. */ +static int snapUpSize(int i) +{ + if (i < 8) + { + i = 8; + } + else + { + unsigned int j; + j = (unsigned int)i; + + j |= (j >> 1); + j |= (j >> 2); + j |= (j >> 4); + j |= (j >> 8); /* Ok, since int >= 16 bits */ +#if (UINT_MAX != 0xffff) + j |= (j >> 16); /* For 32 bit int systems */ +#if (UINT_MAX > 0xffffffffUL) + j |= (j >> 32); /* For 64 bit int systems */ +#endif +#endif + /* Least power of two greater than i */ + j++; + if ((int)j >= i) i = (int)j; + } + return i; +} + +/* int balloc (bstring b, int len) + * + * Increase the size of the memory backing the bstring b to at least len. + */ +int balloc(bstring b, int olen) +{ + int len; + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen <= 0 || + b->mlen < b->slen || olen <= 0) + { + return BSTR_ERR; + } + + if (olen >= b->mlen) + { + unsigned char * x; + + if ((len = snapUpSize(olen)) <= b->mlen) return BSTR_OK; + + /* Assume probability of a non-moving realloc is 0.125 */ + if (7 * b->mlen < 8 * b->slen) + { + /* If slen is close to mlen in size then use realloc to reduce + the memory defragmentation */ + + reallocStrategy:; + + x = (unsigned char *)bstr__realloc(b->data, (size_t)len); + if (x == NULL) + { + /* Since we failed, try allocating the tighest possible + allocation */ + + if (NULL == (x = (unsigned char *)bstr__realloc(b->data, (size_t)(len = olen)))) + { + return BSTR_ERR; + } + } + } + else + { + /* If slen is not close to mlen then avoid the penalty of copying + the extra bytes that are allocated, but not considered part of + the string */ + + if (NULL == (x = (unsigned char *)bstr__alloc((size_t)len))) + { + /* Perhaps there is no available memory for the two + allocations to be in memory at once */ + + goto reallocStrategy; + } + else + { + if (b->slen) bstr__memcpy((char *)x, (char *)b->data, (size_t)b->slen); + bstr__free(b->data); + } + } + b->data = x; + b->mlen = len; + b->data[b->slen] = (unsigned char)'\0'; + } + + return BSTR_OK; +} + +/* int ballocmin (bstring b, int len) + * + * Set the size of the memory backing the bstring b to len or b->slen+1, + * whichever is larger. Note that repeated use of this function can degrade + * performance. + */ +int ballocmin(bstring b, int len) +{ + unsigned char * s; + + if (b == NULL || b->data == NULL || (b->slen + 1) < 0 || b->mlen <= 0 || + b->mlen < b->slen || len <= 0) + { + return BSTR_ERR; + } + + if (len < b->slen + 1) len = b->slen + 1; + + if (len != b->mlen) + { + s = (unsigned char *)bstr__realloc(b->data, (size_t)len); + if (NULL == s) return BSTR_ERR; + s[b->slen] = (unsigned char)'\0'; + b->data = s; + b->mlen = len; + } + + return BSTR_OK; +} + +/* bstring bfromcstr (const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. + */ +bstring bfromcstr(const char * str) +{ + bstring b; + int i; + size_t j; + + if (str == NULL) return NULL; + j = (strlen)(str); + i = snapUpSize((int)(j + (2 - (j != 0)))); + if (i <= (int)j) return NULL; + + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (NULL == b) return NULL; + b->slen = (int)j; + if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) + { + bstr__free(b); + return NULL; + } + + bstr__memcpy(b->data, str, j + 1); + return b; +} + +/* bstring bfromcstralloc (int mlen, const char * str) + * + * Create a bstring which contains the contents of the '\0' terminated char * + * buffer str. The memory buffer backing the string is at least len + * characters in length. + */ +bstring bfromcstralloc(int mlen, const char * str) +{ + bstring b; + int i; + size_t j; + + if (str == NULL) return NULL; + j = (strlen)(str); + i = snapUpSize((int)(j + (2 - (j != 0)))); + if (i <= (int)j) return NULL; + + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b == NULL) return NULL; + b->slen = (int)j; + if (i < mlen) i = mlen; + + if (NULL == (b->data = (unsigned char *)bstr__alloc(b->mlen = i))) + { + bstr__free(b); + return NULL; + } + + bstr__memcpy(b->data, str, j + 1); + return b; +} + +/* bstring blk2bstr (const void * blk, int len) + * + * Create a bstring which contains the content of the block blk of length + * len. + */ +bstring blk2bstr(const void * blk, int len) +{ + bstring b; + int i; + + if (blk == NULL || len < 0) return NULL; + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b == NULL) return NULL; + b->slen = len; + + i = len + (2 - (len != 0)); + i = snapUpSize(i); + + b->mlen = i; + + b->data = (unsigned char *)bstr__alloc((size_t)b->mlen); + if (b->data == NULL) + { + bstr__free(b); + return NULL; + } + + if (len > 0) bstr__memcpy(b->data, blk, (size_t)len); + b->data[len] = (unsigned char)'\0'; + + return b; +} + +/* char * bstr2cstr (const_bstring s, char z) + * + * Create a '\0' terminated char * buffer which is equal to the contents of + * the bstring s, except that any contained '\0' characters are converted + * to the character in z. This returned value should be freed with a + * bcstrfree () call, by the calling application. + */ +char * bstr2cstr(const_bstring b, char z) +{ + int i, l; + char * r; + + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + l = b->slen; + r = (char *)bstr__alloc((size_t)(l + 1)); + if (r == NULL) return r; + + for (i = 0; i < l; i++) + { + r[i] = (char)((b->data[i] == '\0') ? z : (char)(b->data[i])); + } + + r[l] = (unsigned char)'\0'; + + return r; +} + +/* int bcstrfree (char * s) + * + * Frees a C-string generated by bstr2cstr (). This is normally unnecessary + * since it just wraps a call to bstr__free (), however, if bstr__alloc () + * and bstr__free () have been redefined as a macros within the bstrlib + * module (via defining them in memdbg.h after defining + * BSTRLIB_MEMORY_DEBUG) with some difference in behaviour from the std + * library functions, then this allows a correct way of freeing the memory + * that allows higher level code to be independent from these macro + * redefinitions. + */ +int bcstrfree(char * s) +{ + if (s) + { + bstr__free(s); + return BSTR_OK; + } + return BSTR_ERR; +} + +/* int bconcat (bstring b0, const_bstring b1) + * + * Concatenate the bstring b1 to the bstring b0. + */ +int bconcat(bstring b0, const_bstring b1) +{ + int len, d; + bstring aux = (bstring)b1; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL) return BSTR_ERR; + + d = b0->slen; + len = b1->slen; + if ((d | (b0->mlen - d) | len | (d + len)) < 0) return BSTR_ERR; + + if (b0->mlen <= d + len + 1) + { + ptrdiff_t pd = b1->data - b0->data; + if (0 <= pd && pd < b0->mlen) + { + if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; + } + if (balloc(b0, d + len + 1) != BSTR_OK) + { + if (aux != b1) bdestroy(aux); + return BSTR_ERR; + } + } + + bBlockCopy(&b0->data[d], &aux->data[0], (size_t)len); + b0->data[d + len] = (unsigned char)'\0'; + b0->slen = d + len; + if (aux != b1) bdestroy(aux); + return BSTR_OK; +} + +/* int bconchar (bstring b, char c) +/ * + * Concatenate the single character c to the bstring b. + */ +int bconchar(bstring b, char c) +{ + int d; + + if (b == NULL) return BSTR_ERR; + d = b->slen; + if ((d | (b->mlen - d)) < 0 || balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + b->data[d] = (unsigned char)c; + b->data[d + 1] = (unsigned char)'\0'; + b->slen++; + return BSTR_OK; +} + +/* int bcatcstr (bstring b, const char * s) + * + * Concatenate a char * string to a bstring. + */ +int bcatcstr(bstring b, const char * s) +{ + char * d; + int i, l; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL) return BSTR_ERR; + + /* Optimistically concatenate directly */ + l = b->mlen - b->slen; + d = (char *)&b->data[b->slen]; + for (i = 0; i < l; i++) + { + if ((*d++ = *s++) == '\0') + { + b->slen += i; + return BSTR_OK; + } + } + b->slen += i; + + /* Need to explicitely resize and concatenate tail */ + return bcatblk(b, (const void *)s, (int)strlen(s)); +} + +/* int bcatblk (bstring b, const void * s, int len) + * + * Concatenate a fixed length buffer to a bstring. + */ +int bcatblk(bstring b, const void * s, int len) +{ + int nl; + + if (b == NULL || b->data == NULL || b->slen < 0 || b->mlen < b->slen + || b->mlen <= 0 || s == NULL || len < 0) return BSTR_ERR; + + if (0 > (nl = b->slen + len)) return BSTR_ERR; /* Overflow? */ + if (b->mlen <= nl && 0 > balloc(b, nl + 1)) return BSTR_ERR; + + bBlockCopy(&b->data[b->slen], s, (size_t)len); + b->slen = nl; + b->data[nl] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* bstring bstrcpy (const_bstring b) + * + * Create a copy of the bstring b. + */ +bstring bstrcpy(const_bstring b) +{ + bstring b0; + int i, j; + + /* Attempted to copy an invalid string? */ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + b0 = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (b0 == NULL) + { + /* Unable to allocate memory for string header */ + return NULL; + } + + i = b->slen; + j = snapUpSize(i + 1); + + b0->data = (unsigned char *)bstr__alloc(j); + if (b0->data == NULL) + { + j = i + 1; + b0->data = (unsigned char *)bstr__alloc(j); + if (b0->data == NULL) + { + /* Unable to allocate memory for string data */ + bstr__free(b0); + return NULL; + } + } + + b0->mlen = j; + b0->slen = i; + + if (i) bstr__memcpy((char *)b0->data, (char *)b->data, i); + b0->data[b0->slen] = (unsigned char)'\0'; + + return b0; +} + +/* int bassign (bstring a, const_bstring b) + * + * Overwrite the string a with the contents of string b. + */ +int bassign(bstring a, const_bstring b) +{ + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + if (b->slen != 0) + { + if (balloc(a, b->slen) != BSTR_OK) return BSTR_ERR; + bstr__memmove(a->data, b->data, b->slen); + } + else + { + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + } + a->data[b->slen] = (unsigned char)'\0'; + a->slen = b->slen; + return BSTR_OK; +} + +/* int bassignmidstr (bstring a, const_bstring b, int left, int len) + * + * Overwrite the string a with the middle of contents of string b + * starting from position left and running for a length len. left and + * len are clamped to the ends of b as with the function bmidstr. + */ +int bassignmidstr(bstring a, const_bstring b, int left, int len) +{ + if (b == NULL || b->data == NULL || b->slen < 0) + return BSTR_ERR; + + if (left < 0) + { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0) + return BSTR_ERR; + + if (len > 0) + { + if (balloc(a, len) != BSTR_OK) return BSTR_ERR; + bstr__memmove(a->data, b->data + left, len); + a->slen = len; + } + else + { + a->slen = 0; + } + a->data[a->slen] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* int bassigncstr (bstring a, const char * str) + * + * Overwrite the string a with the contents of char * string str. Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned however a may be partially overwritten. + */ +int bassigncstr(bstring a, const char * str) +{ + int i; + size_t len; + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == str) + return BSTR_ERR; + + for (i = 0; i < a->mlen; i++) + { + if ('\0' == (a->data[i] = str[i])) + { + a->slen = i; + return BSTR_OK; + } + } + + a->slen = i; + len = strlen(str + i); + if (len > INT_MAX || i + len + 1 > INT_MAX || + 0 > balloc(a, (int)(i + len + 1))) return BSTR_ERR; + bBlockCopy(a->data + i, str + i, (size_t)len + 1); + a->slen += (int)len; + return BSTR_OK; +} + +/* int bassignblk (bstring a, const void * s, int len) + * + * Overwrite the string a with the contents of the block (s, len). Note that + * the bstring a must be a well defined and writable bstring. If an error + * occurs BSTR_ERR is returned and a is not overwritten. + */ +int bassignblk(bstring a, const void * s, int len) +{ + if (a == NULL || a->data == NULL || a->mlen < a->slen || + a->slen < 0 || a->mlen == 0 || NULL == s || len + 1 < 1) + return BSTR_ERR; + if (len + 1 > a->mlen && 0 > balloc(a, len + 1)) return BSTR_ERR; + bBlockCopy(a->data, s, (size_t)len); + a->data[len] = (unsigned char)'\0'; + a->slen = len; + return BSTR_OK; +} + +/* int btrunc (bstring b, int n) + * + * Truncate the bstring to at most n characters. + */ +int btrunc(bstring b, int n) +{ + if (n < 0 || b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + if (b->slen > n) + { + b->slen = n; + b->data[n] = (unsigned char)'\0'; + } + return BSTR_OK; +} + +#define upcase(c) (toupper ((unsigned char) c)) +#define downcase(c) (tolower ((unsigned char) c)) +#define wspace(c) (isspace ((unsigned char) c)) + +/* int btoupper (bstring b) + * + * Convert contents of bstring to upper case. + */ +int btoupper(bstring b) +{ + int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i = 0, len = b->slen; i < len; i++) + { + b->data[i] = (unsigned char)upcase(b->data[i]); + } + return BSTR_OK; +} + +/* int btolower (bstring b) + * + * Convert contents of bstring to lower case. + */ +int btolower(bstring b) +{ + int i, len; + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + for (i = 0, len = b->slen; i < len; i++) + { + b->data[i] = (unsigned char)downcase(b->data[i]); + } + return BSTR_OK; +} + +/* int bstricmp (const_bstring b0, const_bstring b1) + * + * Compare two strings without differentiating between case. The return + * value is the difference of the values of the characters where the two + * strings first differ after lower case transformation, otherwise 0 is + * returned indicating that the strings are equal. If the lengths are + * different, then a difference from 0 is given, but if the first extra + * character is '\0', then it is taken to be the value UCHAR_MAX+1. + */ +int bstricmp(const_bstring b0, const_bstring b1) +{ + int i, v, n; + + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0) return SHRT_MIN; + if ((n = b0->slen) > b1->slen) n = b1->slen; + else if (b0->slen == b1->slen && b0->data == b1->data) return BSTR_OK; + + for (i = 0; i < n; i++) + { + v = (char)downcase(b0->data[i]) + - (char)downcase(b1->data[i]); + if (0 != v) return v; + } + + if (b0->slen > n) + { + v = (char)downcase(b0->data[n]); + if (v) return v; + return UCHAR_MAX + 1; + } + if (b1->slen > n) + { + v = -(char)downcase(b1->data[n]); + if (v) return v; + return -(int)(UCHAR_MAX + 1); + } + return BSTR_OK; +} + +/* int bstrnicmp (const_bstring b0, const_bstring b1, int n) + * + * Compare two strings without differentiating between case for at most n + * characters. If the position where the two strings first differ is + * before the nth position, the return value is the difference of the values + * of the characters, otherwise 0 is returned. If the lengths are different + * and less than n characters, then a difference from 0 is given, but if the + * first extra character is '\0', then it is taken to be the value + * UCHAR_MAX+1. + */ +int bstrnicmp(const_bstring b0, const_bstring b1, int n) +{ + int i, v, m; + + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0 || n < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) + { + for (i = 0; i < m; i++) + { + v = (char)downcase(b0->data[i]); + v -= (char)downcase(b1->data[i]); + if (v != 0) return b0->data[i] - b1->data[i]; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) + { + v = (char)downcase(b0->data[m]); + if (v) return v; + return UCHAR_MAX + 1; + } + + v = -(char)downcase(b1->data[m]); + if (v) return v; + return -(int)(UCHAR_MAX + 1); +} + +/* int biseqcaseless (const_bstring b0, const_bstring b1) + * + * Compare two strings for equality without differentiating between case. + * If the strings differ other than in case, 0 is returned, if the strings + * are the same, 1 is returned, if there is an error, -1 is returned. If + * the length of the strings are different, this function is O(1). '\0' + * termination characters are not treated in any special way. + */ +int biseqcaseless(const_bstring b0, const_bstring b1) +{ + int i, n; + + if (bdata(b0) == NULL || b0->slen < 0 || + bdata(b1) == NULL || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + for (i = 0, n = b0->slen; i < n; i++) + { + if (b0->data[i] != b1->data[i]) + { + unsigned char c = (unsigned char)downcase(b0->data[i]); + if (c != (unsigned char)downcase(b1->data[i])) return 0; + } + } + return 1; +} + +/* int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len + * without differentiating between case for equality. If the beginning of b0 + * differs from the memory block other than in case (or if b0 is too short), + * 0 is returned, if the strings are the same, 1 is returned, if there is an + * error, -1 is returned. '\0' characters are not treated in any special + * way. + */ +int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len) +{ + int i; + + if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *)blk || len == 0) return 1; + + for (i = 0; i < len; i++) + { + if (b0->data[i] != ((const unsigned char *)blk)[i]) + { + if (downcase(b0->data[i]) != + downcase(((const unsigned char *)blk)[i])) return 0; + } + } + return 1; +} + +/* + * int bltrimws (bstring b) + * + * Delete whitespace contiguous from the left end of the string. + */ +int bltrimws(bstring b) +{ + int i, len; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (len = b->slen, i = 0; i < len; i++) + { + if (!wspace(b->data[i])) + { + return bdelete(b, 0, i); + } + } + + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int brtrimws (bstring b) + * + * Delete whitespace contiguous from the right end of the string. + */ +int brtrimws(bstring b) +{ + int i; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) + { + if (!wspace(b->data[i])) + { + if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; + b->slen = i + 1; + return BSTR_OK; + } + } + + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; +} + +/* + * int btrimws (bstring b) + * + * Delete whitespace contiguous from both ends of the string. + */ +int btrimws(bstring b) +{ + int i, j; + + if (b == NULL || b->data == NULL || b->mlen < b->slen || + b->slen < 0 || b->mlen <= 0) return BSTR_ERR; + + for (i = b->slen - 1; i >= 0; i--) + { + if (!wspace(b->data[i])) + { + if (b->mlen > i) b->data[i + 1] = (unsigned char)'\0'; + b->slen = i + 1; + for (j = 0; wspace(b->data[j]); j++) + { + } + return bdelete(b, 0, j); + } + } + + b->data[0] = (unsigned char)'\0'; + b->slen = 0; + return BSTR_OK; +} + +/* int biseq (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If the strings differ, 0 is returned, if + * the strings are the same, 1 is returned, if there is an error, -1 is + * returned. If the length of the strings are different, this function is + * O(1). '\0' termination characters are not treated in any special way. + */ +int biseq(const_bstring b0, const_bstring b1) +{ + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return BSTR_ERR; + if (b0->slen != b1->slen) return BSTR_OK; + if (b0->data == b1->data || b0->slen == 0) return 1; + return !bstr__memcmp(b0->data, b1->data, b0->slen); +} + +/* int bisstemeqblk (const_bstring b0, const void * blk, int len) + * + * Compare beginning of string b0 with a block of memory of length len for + * equality. If the beginning of b0 differs from the memory block (or if b0 + * is too short), 0 is returned, if the strings are the same, 1 is returned, + * if there is an error, -1 is returned. '\0' characters are not treated in + * any special way. + */ +int bisstemeqblk(const_bstring b0, const void * blk, int len) +{ + int i; + + if (bdata(b0) == NULL || b0->slen < 0 || NULL == blk || len < 0) + return BSTR_ERR; + if (b0->slen < len) return BSTR_OK; + if (b0->data == (const unsigned char *)blk || len == 0) return 1; + + for (i = 0; i < len; i++) + { + if (b0->data[i] != ((const unsigned char *)blk)[i]) return BSTR_OK; + } + return 1; +} + +/* int biseqcstr (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical with the bstring b with no '\0' + * characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal when comparing them in the same format after converting one or the + * other. If the strings are equal 1 is returned, if they are unequal 0 is + * returned and if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstr(const_bstring b, const char * s) +{ + int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i = 0; i < b->slen; i++) + { + if (s[i] == '\0' || b->data[i] != (unsigned char)s[i]) return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int biseqcstrcaseless (const_bstring b, const char *s) + * + * Compare the bstring b and char * string s. The C string s must be '\0' + * terminated at exactly the length of the bstring b, and the contents + * between the two must be identical except for case with the bstring b with + * no '\0' characters for the two contents to be considered equal. This is + * equivalent to the condition that their current contents will be always be + * equal ignoring case when comparing them in the same format after + * converting one or the other. If the strings are equal, except for case, + * 1 is returned, if they are unequal regardless of case 0 is returned and + * if there is a detectable error BSTR_ERR is returned. + */ +int biseqcstrcaseless(const_bstring b, const char * s) +{ + int i; + if (b == NULL || s == NULL || b->data == NULL || b->slen < 0) return BSTR_ERR; + for (i = 0; i < b->slen; i++) + { + if (s[i] == '\0' || + (b->data[i] != (unsigned char)s[i] && + downcase(b->data[i]) != (unsigned char)downcase(s[i]))) + return BSTR_OK; + } + return s[i] == '\0'; +} + +/* int bstrcmp (const_bstring b0, const_bstring b1) + * + * Compare the string b0 and b1. If there is an error, SHRT_MIN is returned, + * otherwise a value less than or greater than zero, indicating that the + * string pointed to by b0 is lexicographically less than or greater than + * the string pointed to by b1 is returned. If the the string lengths are + * unequal but the characters up until the length of the shorter are equal + * then a value less than, or greater than zero, indicating that the string + * pointed to by b0 is shorter or longer than the string pointed to by b1 is + * returned. 0 is returned if and only if the two strings are the same. If + * the length of the strings are different, this function is O(n). Like its + * standard C library counter part strcmp, the comparison does not proceed + * past any '\0' termination characters encountered. + */ +int bstrcmp(const_bstring b0, const_bstring b1) +{ + int i, v, n; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + n = b0->slen; if (n > b1->slen) n = b1->slen; + if (b0->slen == b1->slen && (b0->data == b1->data || b0->slen == 0)) + return BSTR_OK; + + for (i = 0; i < n; i++) + { + v = ((char)b0->data[i]) - ((char)b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; + } + + if (b0->slen > n) return 1; + if (b1->slen > n) return -1; + return BSTR_OK; +} + +/* int bstrncmp (const_bstring b0, const_bstring b1, int n) + * + * Compare the string b0 and b1 for at most n characters. If there is an + * error, SHRT_MIN is returned, otherwise a value is returned as if b0 and + * b1 were first truncated to at most n characters then bstrcmp was called + * with these new strings are paremeters. If the length of the strings are + * different, this function is O(n). Like its standard C library counter + * part strcmp, the comparison does not proceed past any '\0' termination + * characters encountered. + */ +int bstrncmp(const_bstring b0, const_bstring b1, int n) +{ + int i, v, m; + + if (b0 == NULL || b1 == NULL || b0->data == NULL || b1->data == NULL || + b0->slen < 0 || b1->slen < 0) return SHRT_MIN; + m = n; + if (m > b0->slen) m = b0->slen; + if (m > b1->slen) m = b1->slen; + + if (b0->data != b1->data) + { + for (i = 0; i < m; i++) + { + v = ((char)b0->data[i]) - ((char)b1->data[i]); + if (v != 0) return v; + if (b0->data[i] == (unsigned char)'\0') return BSTR_OK; + } + } + + if (n == m || b0->slen == b1->slen) return BSTR_OK; + + if (b0->slen > m) return 1; + return -1; +} + +/* bstring bmidstr (const_bstring b, int left, int len) + * + * Create a bstring which is the substring of b starting from position left + * and running for a length len (clamped by the end of the bstring b.) If + * b is detectably invalid, then NULL is returned. The section described + * by (left, len) is clamped to the boundaries of b. + */ +bstring bmidstr(const_bstring b, int left, int len) +{ + if (b == NULL || b->slen < 0 || b->data == NULL) return NULL; + + if (left < 0) + { + len += left; + left = 0; + } + + if (len > b->slen - left) len = b->slen - left; + + if (len <= 0) return bfromcstr(""); + return blk2bstr(b->data + left, len); +} + +/* int bdelete (bstring b, int pos, int len) + * + * Removes characters from pos to pos+len-1 inclusive and shifts the tail of + * the bstring starting from pos+len to pos. len must be positive for this + * call to have any effect. The section of the string described by (pos, + * len) is clamped to boundaries of the bstring b. + */ +int bdelete(bstring b, int pos, int len) +{ + /* Clamp to left side of bstring */ + if (pos < 0) + { + len += pos; + pos = 0; + } + + if (len < 0 || b == NULL || b->data == NULL || b->slen < 0 || + b->mlen < b->slen || b->mlen <= 0) + return BSTR_ERR; + if (len > 0 && pos < b->slen) + { + if (pos + len >= b->slen) + { + b->slen = pos; + } + else + { + bBlockCopy((char *)(b->data + pos), + (char *)(b->data + pos + len), + b->slen - (pos + len)); + b->slen -= len; + } + b->data[b->slen] = (unsigned char)'\0'; + } + return BSTR_OK; +} + +/* int bdestroy (bstring b) + * + * Free up the bstring. Note that if b is detectably invalid or not writable + * then no action is performed and BSTR_ERR is returned. Like a freed memory + * allocation, dereferences, writes or any other action on b after it has + * been bdestroyed is undefined. + */ +int bdestroy(bstring b) +{ + if (b == NULL || b->slen < 0 || b->mlen <= 0 || b->mlen < b->slen || + b->data == NULL) + return BSTR_ERR; + + bstr__free(b->data); + + /* In case there is any stale usage, there is one more chance to + notice this error. */ + + b->slen = -1; + b->mlen = -__LINE__; + b->data = NULL; + + bstr__free(b); + return BSTR_OK; +} + +/* int binstr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstr(const_bstring b1, int pos, const_bstring b2) +{ + int j, ii, ll, lf; + unsigned char * d0; + unsigned char c0; + register unsigned char * d1; + register unsigned char c1; + register int i; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* No space to find such a string? */ + if ((lf = b1->slen - b2->slen + 1) <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return 0; + + i = pos; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + /* Peel off the b2->slen == 1 case */ + c0 = d0[0]; + if (1 == ll) + { + for (; i < lf; i++) + if (c0 == d1[i]) return i; + return BSTR_ERR; + } + + c1 = c0; + j = 0; + lf = b1->slen - 1; + + ii = -1; + if (i < lf) + do + { + /* Unrolled current character test */ + if (c1 != d1[i]) + { + if (c1 != d1[1 + i]) + { + i += 2; + continue; + } + i++; + } + + /* Take note if this is the start of a potential match */ + if (0 == j) ii = i; + + /* Shift the test character down by one */ + j++; + i++; + + /* If this isn't past the last character continue */ + if (j < ll) + { + c1 = d0[j]; + continue; + } + + N0:; + + /* If no characters mismatched, then we matched */ + if (i == ii + j) return ii; + + /* Shift back to the beginning */ + i -= j; + j = 0; + c1 = c0; + } + while (i < lf); + + /* Deal with last case if unrolling caused a misalignment */ + if (i == lf && ll == j + 1 && c1 == d1[i]) goto N0; + + return BSTR_ERR; +} + +/* int binstrr (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward. If it is found then return with the first position where it is + * found, otherwise return BSTR_ERR. Note that this is just a brute force + * string searcher that does not attempt clever things like the Boyer-Moore + * search algorithm. Because of this there are many degenerate cases where + * this can take much longer than it needs to. + */ +int binstrr(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l; + unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return 0; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) + { + if (d0[j] == d1[i + j]) + { + j++; + if (j >= l) return i; + } + else + { + i--; + if (i < 0) break; + j = 0; + } + } + + return BSTR_ERR; +} + +/* int binstrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * forward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrcaseless(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l, ll; + unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos) return (b2->slen == 0) ? pos : BSTR_ERR; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + l = b1->slen - b2->slen + 1; + + /* No space to find such a string? */ + if (l <= pos) return BSTR_ERR; + + /* An obvious alias case */ + if (b1->data == b2->data && pos == 0) return BSTR_OK; + + i = pos; + j = 0; + + d0 = b2->data; + d1 = b1->data; + ll = b2->slen; + + for (;;) + { + if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) + { + j++; + if (j >= ll) return i; + } + else + { + i++; + if (i >= l) break; + j = 0; + } + } + + return BSTR_ERR; +} + +/* int binstrrcaseless (const_bstring b1, int pos, const_bstring b2) + * + * Search for the bstring b2 in b1 starting from position pos, and searching + * backward but without regard to case. If it is found then return with the + * first position where it is found, otherwise return BSTR_ERR. Note that + * this is just a brute force string searcher that does not attempt clever + * things like the Boyer-Moore search algorithm. Because of this there are + * many degenerate cases where this can take much longer than it needs to. + */ +int binstrrcaseless(const_bstring b1, int pos, const_bstring b2) +{ + int j, i, l; + unsigned char * d0, * d1; + + if (b1 == NULL || b1->data == NULL || b1->slen < 0 || + b2 == NULL || b2->data == NULL || b2->slen < 0) return BSTR_ERR; + if (b1->slen == pos && b2->slen == 0) return pos; + if (b1->slen < pos || pos < 0) return BSTR_ERR; + if (b2->slen == 0) return pos; + + /* Obvious alias case */ + if (b1->data == b2->data && pos == 0 && b2->slen <= b1->slen) return BSTR_OK; + + i = pos; + if ((l = b1->slen - b2->slen) < 0) return BSTR_ERR; + + /* If no space to find such a string then snap back */ + if (l + 1 <= i) i = l; + j = 0; + + d0 = b2->data; + d1 = b1->data; + l = b2->slen; + + for (;;) + { + if (d0[j] == d1[i + j] || downcase(d0[j]) == downcase(d1[i + j])) + { + j++; + if (j >= l) return i; + } + else + { + i--; + if (i < 0) break; + j = 0; + } + } + + return BSTR_ERR; +} + +/* int bstrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b forwards from the position pos + * (inclusive). + */ +int bstrchrp(const_bstring b, int c, int pos) +{ + unsigned char * p; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + p = (unsigned char *)bstr__memchr((b->data + pos), (unsigned char)c, (b->slen - pos)); + if (p) return (int)(p - b->data); + return BSTR_ERR; +} + +/* int bstrrchrp (const_bstring b, int c, int pos) + * + * Search for the character c in b backwards from the position pos in string + * (inclusive). + */ +int bstrrchrp(const_bstring b, int c, int pos) +{ + int i; + + if (b == NULL || b->data == NULL || b->slen <= pos || pos < 0) return BSTR_ERR; + for (i = pos; i >= 0; i--) + { + if (b->data[i] == (unsigned char)c) return i; + } + return BSTR_ERR; +} + +#if !defined(BSTRLIB_AGGRESSIVE_MEMORY_FOR_SPEED_TRADEOFF) +#define LONG_LOG_BITS_QTY (3) +#define LONG_BITS_QTY (1 << LONG_LOG_BITS_QTY) +#define LONG_TYPE unsigned char + +#define CFCLEN ((1 << CHAR_BIT) / LONG_BITS_QTY) +struct charField { LONG_TYPE content[CFCLEN]; }; +#define testInCharField(cf, c) ((cf)->content[(c) >> LONG_LOG_BITS_QTY] & (((long)1) << ((c) & (LONG_BITS_QTY-1)))) +#define setInCharField(cf, idx) {\ + unsigned int c = (unsigned int) (idx); \ + (cf)->content[c >> LONG_LOG_BITS_QTY] |= (LONG_TYPE) (1ul << (c & (LONG_BITS_QTY-1))); \ +} + +#else + +#define CFCLEN (1 << CHAR_BIT) +struct charField { unsigned char content[CFCLEN]; }; +#define testInCharField(cf, c) ((cf)->content[(unsigned char) (c)]) +#define setInCharField(cf, idx) (cf)->content[(unsigned int) (idx)] = ~0 + +#endif + +/* Convert a bstring to charField */ +static int buildCharField(struct charField * cf, const_bstring b) +{ + int i; + if (b == NULL || b->data == NULL || b->slen <= 0) return BSTR_ERR; + memset((void *)cf->content, 0, sizeof(struct charField)); + for (i = 0; i < b->slen; i++) + { + setInCharField(cf, b->data[i]); + } + return BSTR_OK; +} + +static void invertCharField(struct charField * cf) +{ + int i; + for (i = 0; i < CFCLEN; i++) + cf->content[i] = ~cf->content[i]; +} + +/* Inner engine for binchr */ +static int binchrCF(const unsigned char * data, int len, int pos, const struct charField * cf) +{ + int i; + for (i = pos; i < len; i++) + { + unsigned char c = (unsigned char)data[i]; + if (testInCharField(cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * one of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int binchr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (1 == b1->slen) return bstrchrp(b0, b1->data[0], pos); + if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; + return binchrCF(b0->data, b0->slen, pos, &chrs); +} + +/* Inner engine for binchrr */ +static int binchrrCF(const unsigned char * data, int pos, const struct charField * cf) +{ + int i; + for (i = pos; i >= 0; i--) + { + unsigned int c = (unsigned int)data[i]; + if (testInCharField(cf, c)) return i; + } + return BSTR_ERR; +} + +/* int binchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which one of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int binchrr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || b1 == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (1 == b1->slen) return bstrrchrp(b0, b1->data[0], pos); + if (0 > buildCharField(&chrs, b1)) return BSTR_ERR; + return binchrrCF(b0->data, pos, &chrs); +} + +/* int bninchr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the first position in b0 starting from pos or after, in which + * none of the characters in b1 is found and return it. If such a position + * does not exist in b0, then BSTR_ERR is returned. + */ +int bninchr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen <= pos) return BSTR_ERR; + if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; + invertCharField(&chrs); + return binchrCF(b0->data, b0->slen, pos, &chrs); +} + +/* int bninchrr (const_bstring b0, int pos, const_bstring b1); + * + * Search for the last position in b0 no greater than pos, in which none of + * the characters in b1 is found and return it. If such a position does not + * exist in b0, then BSTR_ERR is returned. + */ +int bninchrr(const_bstring b0, int pos, const_bstring b1) +{ + struct charField chrs; + if (pos < 0 || b0 == NULL || b0->data == NULL || + b0->slen < pos) return BSTR_ERR; + if (pos == b0->slen) pos--; + if (buildCharField(&chrs, b1) < 0) return BSTR_ERR; + invertCharField(&chrs); + return binchrrCF(b0->data, pos, &chrs); +} + +/* int bsetstr (bstring b0, int pos, bstring b1, unsigned char fill) + * + * Overwrite the string b0 starting at position pos with the string b1. If + * the position pos is past the end of b0, then the character "fill" is + * appended as necessary to make up the gap between the end of b0 and pos. + * If b1 is NULL, it behaves as if it were a 0-length string. + */ +int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill) +{ + int d, newlen; + ptrdiff_t pd; + bstring aux = (bstring)b1; + + if (pos < 0 || b0 == NULL || b0->slen < 0 || NULL == b0->data || + b0->mlen < b0->slen || b0->mlen <= 0) return BSTR_ERR; + if (b1 != NULL && (b1->slen < 0 || b1->data == NULL)) return BSTR_ERR; + + d = pos; + + /* Aliasing case */ + if (NULL != aux) + { + if ((pd = (ptrdiff_t)(b1->data - b0->data)) >= 0 && pd < (ptrdiff_t)b0->mlen) + { + if (NULL == (aux = bstrcpy(b1))) return BSTR_ERR; + } + d += aux->slen; + } + + /* Increase memory size if necessary */ + if (balloc(b0, d + 1) != BSTR_OK) + { + if (aux != b1) bdestroy(aux); + return BSTR_ERR; + } + + newlen = b0->slen; + + /* Fill in "fill" character as necessary */ + if (pos > newlen) + { + bstr__memset(b0->data + b0->slen, (int)fill, (size_t)(pos - b0->slen)); + newlen = pos; + } + + /* Copy b1 to position pos in b0. */ + if (aux != NULL) + { + bBlockCopy((char *)(b0->data + pos), (char *)aux->data, aux->slen); + if (aux != b1) bdestroy(aux); + } + + /* Indicate the potentially increased size of b0 */ + if (d > newlen) newlen = d; + + b0->slen = newlen; + b0->data[newlen] = (unsigned char)'\0'; + + return BSTR_OK; +} + +/* int binsert (bstring b1, int pos, bstring b2, unsigned char fill) + * + * Inserts the string b2 into b1 at position pos. If the position pos is + * past the end of b1, then the character "fill" is appended as necessary to + * make up the gap between the end of b1 and pos. Unlike bsetstr, binsert + * does not allow b2 to be NULL. + */ +int binsert(bstring b1, int pos, const_bstring b2, unsigned char fill) +{ + int d, l; + ptrdiff_t pd; + bstring aux = (bstring)b2; + + if (pos < 0 || b1 == NULL || b2 == NULL || b1->slen < 0 || + b2->slen < 0 || b1->mlen < b1->slen || b1->mlen <= 0) return BSTR_ERR; + + /* Aliasing case */ + if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->mlen) + { + if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; + } + + /* Compute the two possible end pointers */ + d = b1->slen + aux->slen; + l = pos + aux->slen; + if ((d | l) < 0) return BSTR_ERR; + + if (l > d) + { + /* Inserting past the end of the string */ + if (balloc(b1, l + 1) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + bstr__memset(b1->data + b1->slen, (int)fill, (size_t)(pos - b1->slen)); + b1->slen = l; + } + else + { + /* Inserting in the middle of the string */ + if (balloc(b1, d + 1) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + bBlockCopy(b1->data + l, b1->data + pos, d - l); + b1->slen = d; + } + bBlockCopy(b1->data + pos, aux->data, aux->slen); + b1->data[b1->slen] = (unsigned char)'\0'; + if (aux != b2) bdestroy(aux); + return BSTR_OK; +} + +/* int breplace (bstring b1, int pos, int len, bstring b2, + * unsigned char fill) + * + * Replace a section of a string from pos for a length len with the string b2. + * fill is used is pos > b1->slen. + */ +int breplace(bstring b1, int pos, int len, const_bstring b2, + unsigned char fill) +{ + int pl, ret; + ptrdiff_t pd; + bstring aux = (bstring)b2; + + if (pos < 0 || len < 0 || (pl = pos + len) < 0 || b1 == NULL || + b2 == NULL || b1->data == NULL || b2->data == NULL || + b1->slen < 0 || b2->slen < 0 || b1->mlen < b1->slen || + b1->mlen <= 0) return BSTR_ERR; + + /* Straddles the end? */ + if (pl >= b1->slen) + { + if ((ret = bsetstr(b1, pos, b2, fill)) < 0) return ret; + if (pos + b2->slen < b1->slen) + { + b1->slen = pos + b2->slen; + b1->data[b1->slen] = (unsigned char)'\0'; + } + return ret; + } + + /* Aliasing case */ + if ((pd = (ptrdiff_t)(b2->data - b1->data)) >= 0 && pd < (ptrdiff_t)b1->slen) + { + if (NULL == (aux = bstrcpy(b2))) return BSTR_ERR; + } + + if (aux->slen > len) + { + if (balloc(b1, b1->slen + aux->slen - len) != BSTR_OK) + { + if (aux != b2) bdestroy(aux); + return BSTR_ERR; + } + } + + if (aux->slen != len) bstr__memmove(b1->data + pos + aux->slen, b1->data + pos + len, b1->slen - (pos + len)); + bstr__memcpy(b1->data + pos, aux->data, aux->slen); + b1->slen += aux->slen - len; + b1->data[b1->slen] = (unsigned char)'\0'; + if (aux != b2) bdestroy(aux); + return BSTR_OK; +} + +/* + * findreplaceengine is used to implement bfindreplace and + * bfindreplacecaseless. It works by breaking the three cases of + * expansion, reduction and replacement, and solving each of these + * in the most efficient way possible. + */ + +typedef int (*instr_fnptr) (const_bstring s1, int pos, const_bstring s2); + +#define INITIAL_STATIC_FIND_INDEX_COUNT 32 + +static int findreplaceengine(bstring b, const_bstring find, const_bstring repl, int pos, instr_fnptr instr) +{ + int i, ret, slen, mlen, delta, acc; + int * d; + int static_d[INITIAL_STATIC_FIND_INDEX_COUNT + 1]; /* This +1 is unnecessary, but it shuts up LINT. */ + ptrdiff_t pd; + bstring auxf = (bstring)find; + bstring auxr = (bstring)repl; + + if (b == NULL || b->data == NULL || find == NULL || + find->data == NULL || repl == NULL || repl->data == NULL || + pos < 0 || find->slen <= 0 || b->mlen < 0 || b->slen > b->mlen || + b->mlen <= 0 || b->slen < 0 || repl->slen < 0) return BSTR_ERR; + if (pos > b->slen - find->slen) return BSTR_OK; + + /* Alias with find string */ + pd = (ptrdiff_t)(find->data - b->data); + if ((ptrdiff_t)(pos - find->slen) < pd && pd < (ptrdiff_t)b->slen) + { + if (NULL == (auxf = bstrcpy(find))) return BSTR_ERR; + } + + /* Alias with repl string */ + pd = (ptrdiff_t)(repl->data - b->data); + if ((ptrdiff_t)(pos - repl->slen) < pd && pd < (ptrdiff_t)b->slen) + { + if (NULL == (auxr = bstrcpy(repl))) + { + if (auxf != find) bdestroy(auxf); + return BSTR_ERR; + } + } + + delta = auxf->slen - auxr->slen; + + /* in-place replacement since find and replace strings are of equal + length */ + if (delta == 0) + { + while ((pos = instr(b, pos, auxf)) >= 0) + { + bstr__memcpy(b->data + pos, auxr->data, auxr->slen); + pos += auxf->slen; + } + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return BSTR_OK; + } + + /* shrinking replacement since auxf->slen > auxr->slen */ + if (delta > 0) + { + acc = 0; + + while ((i = instr(b, pos, auxf)) >= 0) + { + if (acc && i > pos) + bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); + if (auxr->slen) + bstr__memcpy(b->data + i - acc, auxr->data, auxr->slen); + acc += delta; + pos = i + auxf->slen; + } + + if (acc) + { + i = b->slen; + if (i > pos) + bstr__memmove(b->data + pos - acc, b->data + pos, i - pos); + b->slen -= acc; + b->data[b->slen] = (unsigned char)'\0'; + } + + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return BSTR_OK; + } + + /* expanding replacement since find->slen < repl->slen. Its a lot + more complicated. This works by first finding all the matches and + storing them to a growable array, then doing at most one resize of + the destination bstring and then performing the direct memory transfers + of the string segment pieces to form the final result. The growable + array of matches uses a deferred doubling reallocing strategy. What + this means is that it starts as a reasonably fixed sized auto array in + the hopes that many if not most cases will never need to grow this + array. But it switches as soon as the bounds of the array will be + exceeded. An extra find result is always appended to this array that + corresponds to the end of the destination string, so slen is checked + against mlen - 1 rather than mlen before resizing. + */ + + mlen = INITIAL_STATIC_FIND_INDEX_COUNT; + d = (int *)static_d; /* Avoid malloc for trivial/initial cases */ + acc = slen = 0; + + while ((pos = instr(b, pos, auxf)) >= 0) + { + if (slen >= mlen - 1) + { + int sl, *t; + + mlen += mlen; + sl = sizeof(int *) * mlen; + if (static_d == d) d = NULL; /* static_d cannot be realloced */ + if (mlen <= 0 || sl < mlen || NULL == (t = (int *)bstr__realloc(d, sl))) + { + ret = BSTR_ERR; + goto done; + } + if (NULL == d) bstr__memcpy(t, static_d, sizeof(static_d)); + d = t; + } + d[slen] = pos; + slen++; + acc -= delta; + pos += auxf->slen; + if (pos < 0 || acc < 0) + { + ret = BSTR_ERR; + goto done; + } + } + + /* slen <= INITIAL_STATIC_INDEX_COUNT-1 or mlen-1 here. */ + d[slen] = b->slen; + + if (BSTR_OK == (ret = balloc(b, b->slen + acc + 1))) + { + b->slen += acc; + for (i = slen - 1; i >= 0; i--) + { + int s, l; + s = d[i] + auxf->slen; + l = d[i + 1] - s; /* d[slen] may be accessed here. */ + if (l) + { + bstr__memmove(b->data + s + acc, b->data + s, l); + } + if (auxr->slen) + { + bstr__memmove(b->data + s + acc - auxr->slen, + auxr->data, auxr->slen); + } + acc += delta; + } + b->data[b->slen] = (unsigned char)'\0'; + } + +done:; + if (static_d == d) d = NULL; + bstr__free(d); + if (auxf != find) bdestroy(auxf); + if (auxr != repl) bdestroy(auxr); + return ret; +} + +/* int bfindreplace (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string with a replace string after a + * given point in a bstring. + */ +int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos) +{ + return findreplaceengine(b, find, repl, pos, binstr); +} + +/* int bfindreplacecaseless (bstring b, const_bstring find, const_bstring repl, + * int pos) + * + * Replace all occurrences of a find string, ignoring case, with a replace + * string after a given point in a bstring. + */ +int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos) +{ + return findreplaceengine(b, find, repl, pos, binstrcaseless); +} + +/* int binsertch (bstring b, int pos, int len, unsigned char fill) + * + * Inserts the character fill repeatedly into b at position pos for a + * length len. If the position pos is past the end of b, then the + * character "fill" is appended as necessary to make up the gap between the + * end of b and the position pos + len. + */ +int binsertch(bstring b, int pos, int len, unsigned char fill) +{ + int d, l, i; + + if (pos < 0 || b == NULL || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || len < 0) return BSTR_ERR; + + /* Compute the two possible end pointers */ + d = b->slen + len; + l = pos + len; + if ((d | l) < 0) return BSTR_ERR; + + if (l > d) + { + /* Inserting past the end of the string */ + if (balloc(b, l + 1) != BSTR_OK) return BSTR_ERR; + pos = b->slen; + b->slen = l; + } + else + { + /* Inserting in the middle of the string */ + if (balloc(b, d + 1) != BSTR_OK) return BSTR_ERR; + for (i = d - 1; i >= l; i--) + { + b->data[i] = b->data[i - len]; + } + b->slen = d; + } + + for (i = pos; i < l; i++) + b->data[i] = fill; + b->data[b->slen] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* int bpattern (bstring b, int len) + * + * Replicate the bstring, b in place, end to end repeatedly until it + * surpasses len characters, then chop the result to exactly len characters. + * This function operates in-place. The function will return with BSTR_ERR + * if b is NULL or of length 0, otherwise BSTR_OK is returned. + */ +int bpattern(bstring b, int len) +{ + int i, d; + + d = blength(b); + if (d <= 0 || len < 0 || balloc(b, len + 1) != BSTR_OK) return BSTR_ERR; + if (len > 0) + { + if (d == 1) return bsetstr(b, len, NULL, b->data[0]); + for (i = d; i < len; i++) + b->data[i] = b->data[i - d]; + } + b->data[len] = (unsigned char)'\0'; + b->slen = len; + return BSTR_OK; +} + +#define BS_BUFF_SZ (1024) + +/* int breada (bstring b, bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to concatenate to the + * bstring b the entire contents of file-like source data in a roughly + * efficient way. + */ +int breada(bstring b, bNread readPtr, void * parm) +{ + int i, l, n; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || readPtr == NULL) return BSTR_ERR; + + i = b->slen; + for (n = i + 16;; n += ((n < BS_BUFF_SZ) ? n : BS_BUFF_SZ)) + { + if (BSTR_OK != balloc(b, n + 1)) return BSTR_ERR; + l = (int)readPtr((void *)(b->data + i), 1, n - i, parm); + i += l; + b->slen = i; + if (i < n) break; + } + + b->data[i] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* bstring bread (bNread readPtr, void * parm) + * + * Use a finite buffer fread-like function readPtr to create a bstring + * filled with the entire contents of file-like source data in a roughly + * efficient way. + */ +bstring bread(bNread readPtr, void * parm) +{ + bstring buff; + + if (0 > breada(buff = bfromcstr(""), readPtr, parm)) + { + bdestroy(buff); + return NULL; + } + return buff; +} + +/* int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result in b. If there is an empty partial + * result, 1 is returned. If no characters are read, or there is some other + * detectable error, BSTR_ERR is returned. + */ +int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator) +{ + int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = 0; + e = b->mlen - 2; + + while ((c = getcPtr(parm)) >= 0) + { + if (d > e) + { + b->slen = d; + if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char)c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char)'\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated to the end of the + * bstring b. The stream read is terminated by the passed in terminator + * parameter. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * function returns with a partial result concatentated to b. If there is + * an empty partial result, 1 is returned. If no characters are read, or + * there is some other detectable error, BSTR_ERR is returned. + */ +int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator) +{ + int c, d, e; + + if (b == NULL || b->mlen <= 0 || b->slen < 0 || b->mlen < b->slen || + b->mlen <= 0 || getcPtr == NULL) return BSTR_ERR; + d = b->slen; + e = b->mlen - 2; + + while ((c = getcPtr(parm)) >= 0) + { + if (d > e) + { + b->slen = d; + if (balloc(b, d + 2) != BSTR_OK) return BSTR_ERR; + e = b->mlen - 2; + } + b->data[d] = (unsigned char)c; + d++; + if (c == terminator) break; + } + + b->data[d] = (unsigned char)'\0'; + b->slen = d; + + return d == 0 && c < 0; +} + +/* bstring bgets (bNgetc getcPtr, void * parm, char terminator) + * + * Use an fgetc-like single character stream reading function (getcPtr) to + * obtain a sequence of characters which are concatenated into a bstring. + * The stream read is terminated by the passed in terminator function. + * + * If getcPtr returns with a negative number, or the terminator character + * (which is appended) is read, then the stream reading is halted and the + * result obtained thus far is returned. If no characters are read, or + * there is some other detectable error, NULL is returned. + */ +bstring bgets(bNgetc getcPtr, void * parm, char terminator) +{ + bstring buff; + + if (0 > bgetsa(buff = bfromcstr(""), getcPtr, parm, terminator) || 0 >= buff->slen) + { + bdestroy(buff); + buff = NULL; + } + return buff; +} + +struct bStream +{ + bstring buff; /* Buffer for over-reads */ + void * parm; /* The stream handle for core stream */ + bNread readFnPtr; /* fread compatible fnptr for core stream */ + int isEOF; /* track file's EOF state */ + int maxBuffSz; +}; + +/* struct bStream * bsopen (bNread readPtr, void * parm) + * + * Wrap a given open stream (described by a fread compatible function + * pointer and stream handle) into an open bStream suitable for the bstring + * library streaming functions. + */ +struct bStream * bsopen(bNread readPtr, void * parm) +{ + struct bStream * s; + + if (readPtr == NULL) return NULL; + s = (struct bStream *)bstr__alloc(sizeof(struct bStream)); + if (s == NULL) return NULL; + s->parm = parm; + s->buff = bfromcstr(""); + s->readFnPtr = readPtr; + s->maxBuffSz = BS_BUFF_SZ; + s->isEOF = 0; + return s; +} + +/* int bsbufflength (struct bStream * s, int sz) + * + * Set the length of the buffer used by the bStream. If sz is zero, the + * length is not set. This function returns with the previous length. + */ +int bsbufflength(struct bStream * s, int sz) +{ + int oldSz; + if (s == NULL || sz < 0) return BSTR_ERR; + oldSz = s->maxBuffSz; + if (sz > 0) s->maxBuffSz = sz; + return oldSz; +} + +int bseof(const struct bStream * s) +{ + if (s == NULL || s->readFnPtr == NULL) return BSTR_ERR; + return s->isEOF && (s->buff->slen == 0); +} + +/* void * bsclose (struct bStream * s) + * + * Close the bStream, and return the handle to the stream that was originally + * used to open the given stream. + */ +void * bsclose(struct bStream * s) +{ + void * parm; + if (s == NULL) return NULL; + s->readFnPtr = NULL; + if (s->buff) bdestroy(s->buff); + s->buff = NULL; + parm = s->parm; + s->parm = NULL; + s->isEOF = 1; + bstr__free(s); + return parm; +} + +/* int bsreadlna (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadlna(bstring r, struct bStream * s, char terminator) +{ + int i, l, ret, rlo; + char * b; + struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 || + r->slen < 0 || r->mlen < r->slen) return BSTR_ERR; + l = s->buff->slen; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)s->buff->data; + x.data = (unsigned char *)b; + + /* First check if the current buffer holds the terminator */ + b[l] = terminator; /* Set sentinel */ + for (i = 0; b[i] != terminator; i++) + ; + if (i < l) + { + x.slen = i + 1; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) + { + if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)(r->data + r->slen); + l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); + if (l <= 0) + { + r->data[r->slen] = (unsigned char)'\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + b[l] = terminator; /* Set sentinel */ + for (i = 0; b[i] != terminator; i++) + ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy(s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* int bsreadlnsa (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlnsa(bstring r, struct bStream * s, const_bstring term) +{ + int i, l, ret, rlo; + unsigned char * b; + struct tagbstring x; + struct charField cf; + + if (s == NULL || s->buff == NULL || r == NULL || term == NULL || + term->data == NULL || r->mlen <= 0 || r->slen < 0 || + r->mlen < r->slen) return BSTR_ERR; + if (term->slen == 1) return bsreadlna(r, s, term->data[0]); + if (term->slen < 1 || buildCharField(&cf, term)) return BSTR_ERR; + + l = s->buff->slen; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *)s->buff->data; + x.data = b; + + /* First check if the current buffer holds the terminator */ + b[l] = term->data[0]; /* Set sentinel */ + for (i = 0; !testInCharField(&cf, b[i]); i++) + ; + if (i < l) + { + x.slen = i + 1; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, i + 1); + return BSTR_OK; + } + + rlo = r->slen; + + /* If not then just concatenate the entire buffer to the output */ + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) return BSTR_ERR; + + /* Perform direct in-place reads into the destination to allow for + the minimum of data-copies */ + for (;;) + { + if (BSTR_OK != balloc(r, r->slen + s->maxBuffSz + 1)) return BSTR_ERR; + b = (unsigned char *)(r->data + r->slen); + l = (int)s->readFnPtr(b, 1, s->maxBuffSz, s->parm); + if (l <= 0) + { + r->data[r->slen] = (unsigned char)'\0'; + s->buff->slen = 0; + s->isEOF = 1; + /* If nothing was read return with an error message */ + return BSTR_ERR & -(r->slen == rlo); + } + + b[l] = term->data[0]; /* Set sentinel */ + for (i = 0; !testInCharField(&cf, b[i]); i++) + ; + if (i < l) break; + r->slen += l; + } + + /* Terminator found, push over-read back to buffer */ + i++; + r->slen += i; + s->buff->slen = l - i; + bstr__memcpy(s->buff->data, b + i, l - i); + r->data[r->slen] = (unsigned char)'\0'; + return BSTR_OK; +} + +/* int bsreada (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsreada(bstring r, struct bStream * s, int n) +{ + int l, ret, orslen; + char * b; + struct tagbstring x; + + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || r->slen < 0 || r->mlen < r->slen || n <= 0) return BSTR_ERR; + + n += r->slen; + if (n <= 0) return BSTR_ERR; + + l = s->buff->slen; + + orslen = r->slen; + + if (0 == l) + { + if (s->isEOF) return BSTR_ERR; + if (r->mlen > n) + { + l = (int)s->readFnPtr(r->data + r->slen, 1, n - r->slen, s->parm); + if (0 >= l || l > n - r->slen) + { + s->isEOF = 1; + return BSTR_ERR; + } + r->slen += l; + r->data[r->slen] = (unsigned char)'\0'; + return 0; + } + } + + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + b = (char *)s->buff->data; + x.data = (unsigned char *)b; + + do + { + if (l + r->slen >= n) + { + x.slen = n - r->slen; + ret = bconcat(r, &x); + s->buff->slen = l; + if (BSTR_OK == ret) bdelete(s->buff, 0, x.slen); + return BSTR_ERR & -(r->slen == orslen); + } + + x.slen = l; + if (BSTR_OK != bconcat(r, &x)) break; + + l = n - r->slen; + if (l > s->maxBuffSz) l = s->maxBuffSz; + + l = (int)s->readFnPtr(b, 1, l, s->parm); + } + while (l > 0); + if (l < 0) l = 0; + if (l == 0) s->isEOF = 1; + s->buff->slen = l; + return BSTR_ERR & -(r->slen == orslen); +} + +/* int bsreadln (bstring r, struct bStream * s, char terminator) + * + * Read a bstring terminated by the terminator character or the end of the + * stream from the bStream (s) and return it into the parameter r. This + * function may read additional characters from the core stream that are not + * returned, but will be retained for subsequent read operations. + */ +int bsreadln(bstring r, struct bStream * s, char terminator) +{ + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0) + return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlna(r, s, terminator); +} + +/* int bsreadlns (bstring r, struct bStream * s, bstring term) + * + * Read a bstring terminated by any character in the term string or the end + * of the stream from the bStream (s) and return it into the parameter r. + * This function may read additional characters from the core stream that + * are not returned, but will be retained for subsequent read operations. + */ +int bsreadlns(bstring r, struct bStream * s, const_bstring term) +{ + if (s == NULL || s->buff == NULL || r == NULL || term == NULL + || term->data == NULL || r->mlen <= 0) return BSTR_ERR; + if (term->slen == 1) return bsreadln(r, s, term->data[0]); + if (term->slen < 1) return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreadlnsa(r, s, term); +} + +/* int bsread (bstring r, struct bStream * s, int n) + * + * Read a bstring of length n (or, if it is fewer, as many bytes as is + * remaining) from the bStream. This function may read additional + * characters from the core stream that are not returned, but will be + * retained for subsequent read operations. This function will not read + * additional characters from the core stream beyond virtual stream pointer. + */ +int bsread(bstring r, struct bStream * s, int n) +{ + if (s == NULL || s->buff == NULL || r == NULL || r->mlen <= 0 + || n <= 0) return BSTR_ERR; + if (BSTR_OK != balloc(s->buff, s->maxBuffSz + 1)) return BSTR_ERR; + r->slen = 0; + return bsreada(r, s, n); +} + +/* int bsunread (struct bStream * s, const_bstring b) + * + * Insert a bstring into the bStream at the current position. These + * characters will be read prior to those that actually come from the core + * stream. + */ +int bsunread(struct bStream * s, const_bstring b) +{ + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return binsert(s->buff, 0, b, (unsigned char)'?'); +} + +/* int bspeek (bstring r, const struct bStream * s) + * + * Return the currently buffered characters from the bStream that will be + * read prior to reads from the core stream. + */ +int bspeek(bstring r, const struct bStream * s) +{ + if (s == NULL || s->buff == NULL) return BSTR_ERR; + return bassign(r, s->buff); +} + +/* bstring bjoin (const struct bstrList * bl, const_bstring sep); + * + * Join the entries of a bstrList into one bstring by sequentially + * concatenating them with the sep string in between. If there is an error + * NULL is returned, otherwise a bstring with the correct result is returned. + */ +bstring bjoin(const struct bstrList * bl, const_bstring sep) +{ + bstring b; + int i, c, v; + + if (bl == NULL || bl->qty < 0) return NULL; + if (sep != NULL && (sep->slen < 0 || sep->data == NULL)) return NULL; + + for (i = 0, c = 1; i < bl->qty; i++) + { + v = bl->entry[i]->slen; + if (v < 0) return NULL; /* Invalid input */ + c += v; + if (c < 0) return NULL; /* Wrap around ?? */ + } + + if (sep != NULL) c += (bl->qty - 1) * sep->slen; + + b = (bstring)bstr__alloc(sizeof(struct tagbstring)); + if (NULL == b) return NULL; /* Out of memory */ + b->data = (unsigned char *)bstr__alloc(c); + if (b->data == NULL) + { + bstr__free(b); + return NULL; + } + + b->mlen = c; + b->slen = c - 1; + + for (i = 0, c = 0; i < bl->qty; i++) + { + if (i > 0 && sep != NULL) + { + bstr__memcpy(b->data + c, sep->data, sep->slen); + c += sep->slen; + } + v = bl->entry[i]->slen; + bstr__memcpy(b->data + c, bl->entry[i]->data, v); + c += v; + } + b->data[c] = (unsigned char)'\0'; + return b; +} + +#define BSSSC_BUFF_LEN (256) + +/* int bssplitscb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by any of the characters in splitStr. An empty splitStr causes + * the whole stream to be iterated once. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitscb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) +{ + struct charField chrs; + bstring buff; + int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; + + if (splitStr->slen == 0) + { + while (bsreada(buff, s, BSSSC_BUFF_LEN) >= 0) + ; + if ((ret = cb(parm, 0, buff)) > 0) + ret = 0; + } + else + { + buildCharField(&chrs, splitStr); + ret = p = i = 0; + for (;;) + { + if (i >= buff->slen) + { + bsreada(buff, s, BSSSC_BUFF_LEN); + if (i >= buff->slen) + { + if (0 < (ret = cb(parm, p, buff))) ret = 0; + break; + } + } + if (testInCharField(&chrs, buff->data[i])) + { + struct tagbstring t; + unsigned char c; + + blk2tbstr(t, buff->data + i + 1, buff->slen - (i + 1)); + if ((ret = bsunread(s, &t)) < 0) break; + buff->slen = i; + c = buff->data[i]; + buff->data[i] = (unsigned char)'\0'; + if ((ret = cb(parm, p, buff)) < 0) break; + buff->data[i] = c; + buff->slen = 0; + p += i + 1; + i = -1; + } + i++; + } + } + + bdestroy(buff); + return ret; +} + +/* int bssplitstrcb (struct bStream * s, const_bstring splitStr, + * int (* cb) (void * parm, int ofs, const_bstring entry), void * parm) + * + * Iterate the set of disjoint sequential substrings read from a stream + * divided by the entire substring splitStr. An empty splitStr causes + * each character of the stream to be iterated. + * + * Note: At the point of calling the cb function, the bStream pointer is + * pointed exactly at the position right after having read the split + * character. The cb function can act on the stream by causing the bStream + * pointer to move, and bssplitscb will continue by starting the next split + * at the position of the pointer after the return from cb. + * + * However, if the cb causes the bStream s to be destroyed then the cb must + * return with a negative value, otherwise bssplitscb will continue in an + * undefined manner. + */ +int bssplitstrcb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm) +{ + bstring buff; + int i, p, ret; + + if (cb == NULL || s == NULL || s->readFnPtr == NULL + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (splitStr->slen == 1) return bssplitscb(s, splitStr, cb, parm); + + if (NULL == (buff = bfromcstr(""))) return BSTR_ERR; + + if (splitStr->slen == 0) + { + for (i = 0; bsreada(buff, s, BSSSC_BUFF_LEN) >= 0; i++) + { + if ((ret = cb(parm, 0, buff)) < 0) + { + bdestroy(buff); + return ret; + } + buff->slen = 0; + } + return BSTR_OK; + } + else + { + ret = p = i = 0; + for (i = p = 0;;) + { + if ((ret = binstr(buff, 0, splitStr)) >= 0) + { + struct tagbstring t; + blk2tbstr(t, buff->data, ret); + i = ret + splitStr->slen; + if ((ret = cb(parm, p, &t)) < 0) break; + p += i; + bdelete(buff, 0, i); + } + else + { + bsreada(buff, s, BSSSC_BUFF_LEN); + if (bseof(s)) + { + if ((ret = cb(parm, p, buff)) > 0) ret = 0; + break; + } + } + } + } + + bdestroy(buff); + return ret; +} + +/* int bstrListCreate (void) + * + * Create a bstrList. + */ +struct bstrList * bstrListCreate(void) +{ + struct bstrList * sl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (sl) + { + sl->entry = (bstring *)bstr__alloc(1 * sizeof(bstring)); + if (!sl->entry) + { + bstr__free(sl); + sl = NULL; + } + else + { + sl->qty = 0; + sl->mlen = 1; + } + } + return sl; +} + +/* int bstrListDestroy (struct bstrList * sl) + * + * Destroy a bstrList that has been created by bsplit, bsplits or bstrListCreate. + */ +int bstrListDestroy(struct bstrList * sl) +{ + int i; + if (sl == NULL || sl->qty < 0) return BSTR_ERR; + for (i = 0; i < sl->qty; i++) + { + if (sl->entry[i]) + { + bdestroy(sl->entry[i]); + sl->entry[i] = NULL; + } + } + sl->qty = -1; + sl->mlen = -1; + bstr__free(sl->entry); + sl->entry = NULL; + bstr__free(sl); + return BSTR_OK; +} + +/* int bstrListAlloc (struct bstrList * sl, int msz) + * + * Ensure that there is memory for at least msz number of entries for the + * list. + */ +int bstrListAlloc(struct bstrList * sl, int msz) +{ + bstring * l; + int smsz; + size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (sl->mlen >= msz) return BSTR_OK; + smsz = snapUpSize(msz); + nsz = ((size_t)smsz) * sizeof(bstring); + if (nsz < (size_t)smsz) return BSTR_ERR; + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) + { + smsz = msz; + nsz = ((size_t)smsz) * sizeof(bstring); + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) return BSTR_ERR; + } + sl->mlen = smsz; + sl->entry = l; + return BSTR_OK; +} + +/* int bstrListAllocMin (struct bstrList * sl, int msz) + * + * Try to allocate the minimum amount of memory for the list to include at + * least msz entries or sl->qty whichever is greater. + */ +int bstrListAllocMin(struct bstrList * sl, int msz) +{ + bstring * l; + size_t nsz; + if (!sl || msz <= 0 || !sl->entry || sl->qty < 0 || sl->mlen <= 0 || sl->qty > sl->mlen) return BSTR_ERR; + if (msz < sl->qty) msz = sl->qty; + if (sl->mlen == msz) return BSTR_OK; + nsz = ((size_t)msz) * sizeof(bstring); + if (nsz < (size_t)msz) return BSTR_ERR; + l = (bstring *)bstr__realloc(sl->entry, nsz); + if (!l) return BSTR_ERR; + sl->mlen = msz; + sl->entry = l; + return BSTR_OK; +} + +/* int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * character in splitChar. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitcb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitcb will continue in an undefined manner. + */ +int bsplitcb(const_bstring str, unsigned char splitChar, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen) + return BSTR_ERR; + + p = pos; + do + { + for (i = p; i < str->slen; i++) + { + if (str->data[i] == splitChar) break; + } + if ((ret = cb(parm, p, i - p)) < 0) return ret; + p = i + 1; + } + while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by any + * of the characters in splitStr. An empty splitStr causes the whole str to + * be iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitscb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitscb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + struct charField chrs; + int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + if (splitStr->slen == 0) + { + if ((ret = cb(parm, 0, str->slen)) > 0) ret = 0; + return ret; + } + + if (splitStr->slen == 1) + return bsplitcb(str, splitStr->data[0], pos, cb, parm); + + buildCharField(&chrs, splitStr); + + p = pos; + do + { + for (i = p; i < str->slen; i++) + { + if (testInCharField(&chrs, str->data[i])) break; + } + if ((ret = cb(parm, p, i - p)) < 0) return ret; + p = i + 1; + } + while (p <= str->slen); + return BSTR_OK; +} + +/* int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + * int (* cb) (void * parm, int ofs, int len), void * parm) + * + * Iterate the set of disjoint sequential substrings over str divided by the + * substring splitStr. An empty splitStr causes the whole str to be + * iterated once. + * + * Note: Non-destructive modification of str from within the cb function + * while performing this split is not undefined. bsplitstrcb behaves in + * sequential lock step with calls to cb. I.e., after returning from a cb + * that return a non-negative integer, bsplitscb continues from the position + * 1 character after the last detected split character and it will halt + * immediately if the length of str falls below this point. However, if the + * cb function destroys str, then it *must* return with a negative value, + * otherwise bsplitscb will continue in an undefined manner. + */ +int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm) +{ + int i, p, ret; + + if (cb == NULL || str == NULL || pos < 0 || pos > str->slen + || splitStr == NULL || splitStr->slen < 0) return BSTR_ERR; + + if (0 == splitStr->slen) + { + for (i = pos; i < str->slen; i++) + { + if ((ret = cb(parm, i, 1)) < 0) return ret; + } + return BSTR_OK; + } + + if (splitStr->slen == 1) + return bsplitcb(str, splitStr->data[0], pos, cb, parm); + + for (i = p = pos; i <= str->slen - splitStr->slen; i++) + { + if (0 == bstr__memcmp(splitStr->data, str->data + i, splitStr->slen)) + { + if ((ret = cb(parm, p, i - p)) < 0) return ret; + i += splitStr->slen; + p = i; + } + } + if ((ret = cb(parm, p, str->slen - p)) < 0) return ret; + return BSTR_OK; +} + +struct genBstrList +{ + bstring b; + struct bstrList * bl; +}; + +static int bscb(void * parm, int ofs, int len) +{ + struct genBstrList * g = (struct genBstrList *)parm; + if (g->bl->qty >= g->bl->mlen) + { + int mlen = g->bl->mlen * 2; + bstring * tbl; + + while (g->bl->qty >= mlen) + { + if (mlen < g->bl->mlen) return BSTR_ERR; + mlen += mlen; + } + + tbl = (bstring *)bstr__realloc(g->bl->entry, sizeof(bstring) * mlen); + if (tbl == NULL) return BSTR_ERR; + + g->bl->entry = tbl; + g->bl->mlen = mlen; + } + + g->bl->entry[g->bl->qty] = bmidstr(g->b, ofs, len); + g->bl->qty++; + return BSTR_OK; +} + +/* struct bstrList * bsplit (const_bstring str, unsigned char splitChar) + * + * Create an array of sequential substrings from str divided by the character + * splitChar. + */ +struct bstrList * bsplit(const_bstring str, unsigned char splitChar) +{ + struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } + + g.b = (bstring)str; + g.bl->qty = 0; + if (bsplitcb(str, splitChar, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplitstr (const_bstring str, const_bstring splitStr) + * + * Create an array of sequential substrings from str divided by the entire + * substring splitStr. + */ +struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr) +{ + struct genBstrList g; + + if (str == NULL || str->data == NULL || str->slen < 0) return NULL; + + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } + + g.b = (bstring)str; + g.bl->qty = 0; + if (bsplitstrcb(str, splitStr, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; +} + +/* struct bstrList * bsplits (const_bstring str, bstring splitStr) + * + * Create an array of sequential substrings from str divided by any of the + * characters in splitStr. An empty splitStr causes a single entry bstrList + * containing a copy of str to be returned. + */ +struct bstrList * bsplits(const_bstring str, const_bstring splitStr) +{ + struct genBstrList g; + + if (str == NULL || str->slen < 0 || str->data == NULL || + splitStr == NULL || splitStr->slen < 0 || splitStr->data == NULL) + return NULL; + + g.bl = (struct bstrList *)bstr__alloc(sizeof(struct bstrList)); + if (g.bl == NULL) return NULL; + g.bl->mlen = 4; + g.bl->entry = (bstring *)bstr__alloc(g.bl->mlen * sizeof(bstring)); + if (NULL == g.bl->entry) + { + bstr__free(g.bl); + return NULL; + } + g.b = (bstring)str; + g.bl->qty = 0; + + if (bsplitscb(str, splitStr, 0, bscb, &g) < 0) + { + bstrListDestroy(g.bl); + return NULL; + } + return g.bl; +} + +#if defined(__TURBOC__) && !defined(__BORLANDC__) +# ifndef BSTRLIB_NOVSNP +# define BSTRLIB_NOVSNP +# endif +#endif + +/* Give WATCOM C/C++, MSVC some latitude for their non-support of vsnprintf */ +#if defined(__WATCOMC__) || defined(_MSC_VER) +#define exvsnprintf(r, b, n, f, a) {r = _vsnprintf (b,n,f,a);} +#else +#ifdef BSTRLIB_NOVSNP +/* This is just a hack. If you are using a system without a vsnprintf, it is + not recommended that bformat be used at all. */ +#define exvsnprintf(r, b, n, f, a) {vsprintf (b,f,a); r = -1;} +#define START_VSNBUFF (256) +#else + +#ifdef __GNUC__ +/* Something is making gcc complain about this prototype not being here, so + I've just gone ahead and put it in. */ +//extern int vsnprintf (char *buf, size_t count, const char *format, va_list arg); +#endif + +#define exvsnprintf(r, b, n, f, a) {r = vsnprintf (b,n,f,a);} +#endif +#endif + +#if !defined(BSTRLIB_NOVSNP) + +#ifndef START_VSNBUFF +#define START_VSNBUFF (16) +#endif + +/* On IRIX vsnprintf returns n-1 when the operation would overflow the target + buffer, WATCOM and MSVC both return -1, while C99 requires that the + returned value be exactly what the length would be if the buffer would be + large enough. This leads to the idea that if the return value is larger + than n, then changing n to the return value will reduce the number of + iterations required. */ + +/* int bformata (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it appends the results to + * a bstring which contains what would have been output. Note that if there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bformata(bstring b, const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; + } + + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); + + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return BSTR_ERR; + } + } + + r = bconcat(b, buff); + bdestroy(buff); + return r; +} + +/* int bassignformat (bstring b, const char * fmt, ...) + * + * After the first parameter, it takes the same parameters as printf (), but + * rather than outputting results to stdio, it outputs the results to + * the bstring parameter b. Note that if there is an early generation of a + * '\0' character, the bstring will be truncated to this end point. + */ +int bassignformat(bstring b, const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; + + if (b == NULL || fmt == NULL || b->data == NULL || b->mlen <= 0 + || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return BSTR_ERR; + } + + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); + + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return BSTR_ERR; + } + } + + r = bassign(b, buff); + bdestroy(buff); + return r; +} + +/* bstring bformat (const char * fmt, ...) + * + * Takes the same parameters as printf (), but rather than outputting results + * to stdio, it forms a bstring which contains what would have been output. + * Note that if there is an early generation of a '\0' character, the + * bstring will be truncated to this end point. + */ +bstring bformat(const char * fmt, ...) +{ + va_list arglist; + bstring buff; + int n, r; + + if (fmt == NULL) return NULL; + + /* Since the length is not determinable beforehand, a search is + performed using the truncating "vsnprintf" call (to avoid buffer + overflows) on increasing potential sizes for the output result. */ + + if ((n = (int)(2 * strlen(fmt))) < START_VSNBUFF) n = START_VSNBUFF; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) + { + n = 1; + if (NULL == (buff = bfromcstralloc(n + 2, ""))) return NULL; + } + + for (;;) + { + va_start(arglist, fmt); + exvsnprintf(r, (char *)buff->data, n + 1, fmt, arglist); + va_end(arglist); + + buff->data[n] = (unsigned char)'\0'; + buff->slen = (int)(strlen)((char *)buff->data); + + if (buff->slen < n) break; + + if (r > n) n = r; else n += n; + + if (BSTR_OK != balloc(buff, n + 2)) + { + bdestroy(buff); + return NULL; + } + } + + return buff; +} + +/* int bvcformata (bstring b, int count, const char * fmt, va_list arglist) + * + * The bvcformata function formats data under control of the format control + * string fmt and attempts to append the result to b. The fmt parameter is + * the same as that of the printf function. The variable argument list is + * replaced with arglist, which has been initialized by the va_start macro. + * The size of the appended output is upper bounded by count. If the + * required output exceeds count, the string b is not augmented with any + * contents and a value below BSTR_ERR is returned. If a value below -count + * is returned then it is recommended that the negative of this value be + * used as an update to the count in a subsequent pass. On other errors, + * such as running out of memory, parameter errors or numeric wrap around + * BSTR_ERR is returned. BSTR_OK is returned when the output is successfully + * generated and appended to b. + * + * Note: There is no sanity checking of arglist, and this function is + * destructive of the contents of b from the b->slen point onward. If there + * is an early generation of a '\0' character, the bstring will be truncated + * to this end point. + */ +int bvcformata(bstring b, int count, const char * fmt, va_list arg) +{ + int n, r, l; + + if (b == NULL || fmt == NULL || count <= 0 || b->data == NULL + || b->mlen <= 0 || b->slen < 0 || b->slen > b->mlen) return BSTR_ERR; + + if (count > (n = b->slen + count) + 2) return BSTR_ERR; + if (BSTR_OK != balloc(b, n + 2)) return BSTR_ERR; + + exvsnprintf(r, (char *)b->data + b->slen, count + 2, fmt, arg); + + /* Did the operation complete successfully within bounds? */ + for (l = b->slen; l <= n; l++) + { + if ('\0' == b->data[l]) + { + b->slen = l; + return BSTR_OK; + } + } + + /* Abort, since the buffer was not large enough. The return value + tries to help set what the retry length should be. */ + + b->data[b->slen] = '\0'; + if (r > count + 1) /* Does r specify a particular target length? */ + { + n = r; + } + else + { + n = count + count; /* If not, just double the size of count */ + if (count > n) n = INT_MAX; + } + n = -n; + + if (n > BSTR_ERR - 1) n = BSTR_ERR - 1; + return n; +} + +#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.h b/third_party/HLSLcc/src/cbstring/bstrlib.h new file mode 100644 index 0000000..5ea8454 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bstrlib.h @@ -0,0 +1,306 @@ +/* + * This source file is part of the bstring string library. This code was + * written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause + * BSD open source license or GPL v2.0. Refer to the accompanying documentation + * for details on usage and license. + */ + +/* + * bstrlib.h + * + * This file is the header file for the core module for implementing the + * bstring functions. + */ + +#ifndef BSTRLIB_INCLUDE +#define BSTRLIB_INCLUDE + +#ifdef __cplusplus +extern "C" { +#endif + +#include +#include +#include +#include + +#if !defined(BSTRLIB_VSNP_OK) && !defined(BSTRLIB_NOVSNP) +# if defined(__TURBOC__) && !defined(__BORLANDC__) +# define BSTRLIB_NOVSNP +# endif +#endif + +#define BSTR_ERR (-1) +#define BSTR_OK (0) +#define BSTR_BS_BUFF_LENGTH_GET (0) + +typedef struct tagbstring * bstring; +typedef const struct tagbstring * const_bstring; + +/* Copy functions */ +#define cstr2bstr bfromcstr +extern bstring bfromcstr(const char * str); +extern bstring bfromcstralloc(int mlen, const char * str); +extern bstring blk2bstr(const void * blk, int len); +extern char * bstr2cstr(const_bstring s, char z); +extern int bcstrfree(char * s); +extern bstring bstrcpy(const_bstring b1); +extern int bassign(bstring a, const_bstring b); +extern int bassignmidstr(bstring a, const_bstring b, int left, int len); +extern int bassigncstr(bstring a, const char * str); +extern int bassignblk(bstring a, const void * s, int len); + +/* Destroy function */ +extern int bdestroy(bstring b); + +/* Space allocation hinting functions */ +extern int balloc(bstring s, int len); +extern int ballocmin(bstring b, int len); + +/* Substring extraction */ +extern bstring bmidstr(const_bstring b, int left, int len); + +/* Various standard manipulations */ +extern int bconcat(bstring b0, const_bstring b1); +extern int bconchar(bstring b0, char c); +extern int bcatcstr(bstring b, const char * s); +extern int bcatblk(bstring b, const void * s, int len); +extern int binsert(bstring s1, int pos, const_bstring s2, unsigned char fill); +extern int binsertch(bstring s1, int pos, int len, unsigned char fill); +extern int breplace(bstring b1, int pos, int len, const_bstring b2, unsigned char fill); +extern int bdelete(bstring s1, int pos, int len); +extern int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill); +extern int btrunc(bstring b, int n); + +/* Scan/search functions */ +extern int bstricmp(const_bstring b0, const_bstring b1); +extern int bstrnicmp(const_bstring b0, const_bstring b1, int n); +extern int biseqcaseless(const_bstring b0, const_bstring b1); +extern int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len); +extern int biseq(const_bstring b0, const_bstring b1); +extern int bisstemeqblk(const_bstring b0, const void * blk, int len); +extern int biseqcstr(const_bstring b, const char * s); +extern int biseqcstrcaseless(const_bstring b, const char * s); +extern int bstrcmp(const_bstring b0, const_bstring b1); +extern int bstrncmp(const_bstring b0, const_bstring b1, int n); +extern int binstr(const_bstring s1, int pos, const_bstring s2); +extern int binstrr(const_bstring s1, int pos, const_bstring s2); +extern int binstrcaseless(const_bstring s1, int pos, const_bstring s2); +extern int binstrrcaseless(const_bstring s1, int pos, const_bstring s2); +extern int bstrchrp(const_bstring b, int c, int pos); +extern int bstrrchrp(const_bstring b, int c, int pos); +#define bstrchr(b, c) bstrchrp ((b), (c), 0) +#define bstrrchr(b, c) bstrrchrp ((b), (c), blength(b)-1) +extern int binchr(const_bstring b0, int pos, const_bstring b1); +extern int binchrr(const_bstring b0, int pos, const_bstring b1); +extern int bninchr(const_bstring b0, int pos, const_bstring b1); +extern int bninchrr(const_bstring b0, int pos, const_bstring b1); +extern int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos); +extern int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos); + +/* List of string container functions */ +struct bstrList +{ + int qty, mlen; + bstring * entry; +}; +extern struct bstrList * bstrListCreate(void); +extern int bstrListDestroy(struct bstrList * sl); +extern int bstrListAlloc(struct bstrList * sl, int msz); +extern int bstrListAllocMin(struct bstrList * sl, int msz); + +/* String split and join functions */ +extern struct bstrList * bsplit(const_bstring str, unsigned char splitChar); +extern struct bstrList * bsplits(const_bstring str, const_bstring splitStr); +extern struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr); +extern bstring bjoin(const struct bstrList * bl, const_bstring sep); +extern int bsplitcb(const_bstring str, unsigned char splitChar, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); +extern int bsplitscb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); +extern int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos, + int (* cb)(void * parm, int ofs, int len), void * parm); + +/* Miscellaneous functions */ +extern int bpattern(bstring b, int len); +extern int btoupper(bstring b); +extern int btolower(bstring b); +extern int bltrimws(bstring b); +extern int brtrimws(bstring b); +extern int btrimws(bstring b); + +/* <*>printf format functions */ +#if !defined(BSTRLIB_NOVSNP) +extern bstring bformat(const char * fmt, ...); +extern int bformata(bstring b, const char * fmt, ...); +extern int bassignformat(bstring b, const char * fmt, ...); +extern int bvcformata(bstring b, int count, const char * fmt, va_list arglist); + +#define bvformata(ret, b, fmt, lastarg) { \ +bstring bstrtmp_b = (b); \ +const char * bstrtmp_fmt = (fmt); \ +int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \ + for (;;) { \ + va_list bstrtmp_arglist; \ + va_start (bstrtmp_arglist, lastarg); \ + bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \ + va_end (bstrtmp_arglist); \ + if (bstrtmp_r >= 0) { /* Everything went ok */ \ + bstrtmp_r = BSTR_OK; \ + break; \ + } else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \ + bstrtmp_r = BSTR_ERR; \ + break; \ + } \ + bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \ + } \ + ret = bstrtmp_r; \ +} + +#endif + +typedef int (*bNgetc) (void *parm); +typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm); + +/* Input functions */ +extern bstring bgets(bNgetc getcPtr, void * parm, char terminator); +extern bstring bread(bNread readPtr, void * parm); +extern int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator); +extern int breada(bstring b, bNread readPtr, void * parm); + +/* Stream functions */ +extern struct bStream * bsopen(bNread readPtr, void * parm); +extern void * bsclose(struct bStream * s); +extern int bsbufflength(struct bStream * s, int sz); +extern int bsreadln(bstring b, struct bStream * s, char terminator); +extern int bsreadlns(bstring r, struct bStream * s, const_bstring term); +extern int bsread(bstring b, struct bStream * s, int n); +extern int bsreadlna(bstring b, struct bStream * s, char terminator); +extern int bsreadlnsa(bstring r, struct bStream * s, const_bstring term); +extern int bsreada(bstring b, struct bStream * s, int n); +extern int bsunread(struct bStream * s, const_bstring b); +extern int bspeek(bstring r, const struct bStream * s); +extern int bssplitscb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); +extern int bssplitstrcb(struct bStream * s, const_bstring splitStr, + int (* cb)(void * parm, int ofs, const_bstring entry), void * parm); +extern int bseof(const struct bStream * s); + +struct tagbstring +{ + int mlen; + int slen; + unsigned char * data; +}; + +/* Accessor macros */ +#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen)) +#define blength(b) (blengthe ((b), 0)) +#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o)) +#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0)) +#define bdatae(b, e) (bdataofse (b, 0, e)) +#define bdata(b) (bdataofs (b, 0)) +#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e)) +#define bchar(b, p) bchare ((b), (p), '\0') + +/* Static constant string initialization macro */ +#define bsStaticMlen(q, m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")} +#if defined(_MSC_VER) +/* There are many versions of MSVC which emit __LINE__ as a non-constant. */ +# define bsStatic(q) bsStaticMlen(q,-32) +#endif +#ifndef bsStatic +# define bsStatic(q) bsStaticMlen(q,-__LINE__) +#endif + +/* Static constant block parameter pair */ +#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1) + +/* Reference building macros */ +#define cstr2tbstr btfromcstr +#define btfromcstr(t, s) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \ + (t).mlen = -1; \ +} +#define blk2tbstr(t, s, l) { \ + (t).data = (unsigned char *) (s); \ + (t).slen = l; \ + (t).mlen = -1; \ +} +#define btfromblk(t, s, l) blk2tbstr(t,s,l) +#define bmid2tbstr(t, b, p, l) { \ + const_bstring bstrtmp_s = (b); \ + if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \ + int bstrtmp_left = (p); \ + int bstrtmp_len = (l); \ + if (bstrtmp_left < 0) { \ + bstrtmp_len += bstrtmp_left; \ + bstrtmp_left = 0; \ + } \ + if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \ + bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \ + if (bstrtmp_len <= 0) { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } else { \ + (t).data = bstrtmp_s->data + bstrtmp_left; \ + (t).slen = bstrtmp_len; \ + } \ + } else { \ + (t).data = (unsigned char *)""; \ + (t).slen = 0; \ + } \ + (t).mlen = -__LINE__; \ +} +#define btfromblkltrimws(t, s, l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l); \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} +#define btfromblkrtrimws(t, s, l) { \ + int bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_len >= 0; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s; \ + (t).slen = bstrtmp_len + 1; \ + (t).mlen = -__LINE__; \ +} +#define btfromblktrimws(t, s, l) { \ + int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \ + unsigned char * bstrtmp_s = (s); \ + if (bstrtmp_s && bstrtmp_len >= 0) { \ + for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \ + if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \ + } \ + for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \ + if (!isspace (bstrtmp_s[bstrtmp_len])) break; \ + } \ + } \ + (t).data = bstrtmp_s + bstrtmp_idx; \ + (t).slen = bstrtmp_len + 1 - bstrtmp_idx; \ + (t).mlen = -__LINE__; \ +} + +/* Write protection macros */ +#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; } +#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); } +#define biswriteprotected(t) ((t).mlen <= 0) + +#ifdef __cplusplus +} +#endif + +#endif diff --git a/third_party/HLSLcc/src/cbstring/bstrlib.txt b/third_party/HLSLcc/src/cbstring/bstrlib.txt new file mode 100644 index 0000000..bf48491 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/bstrlib.txt @@ -0,0 +1,3202 @@ +Better String library +--------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +(Bstrlib for short) is the management of "bstring"s which are a significant +improvement over '\0' terminated char buffers. + +=============================================================================== + +Motivation +---------- + +The standard C string library has serious problems: + + 1) Its use of '\0' to denote the end of the string means knowing a + string's length is O(n) when it could be O(1). + 2) It imposes an interpretation for the character value '\0'. + 3) gets() always exposes the application to a buffer overflow. + 4) strtok() modifies the string its parsing and thus may not be usable in + programs which are re-entrant or multithreaded. + 5) fgets has the unusual semantic of ignoring '\0's that occur before + '\n's are consumed. + 6) There is no memory management, and actions performed such as strcpy, + strcat and sprintf are common places for buffer overflows. + 7) strncpy() doesn't '\0' terminate the destination in some cases. + 8) Passing NULL to C library string functions causes an undefined NULL + pointer access. + 9) Parameter aliasing (overlapping, or self-referencing parameters) + within most C library functions has undefined behavior. + 10) Many C library string function calls take integer parameters with + restricted legal ranges. Parameters passed outside these ranges are + not typically detected and cause undefined behavior. + +So the desire is to create an alternative string library that does not suffer +from the above problems and adds in the following functionality: + + 1) Incorporate string functionality seen from other languages. + a) MID$() - from BASIC + b) split()/join() - from Python + c) string/char x n - from Perl + 2) Implement analogs to functions that combine stream IO and char buffers + without creating a dependency on stream IO functionality. + 3) Implement the basic text editor-style functions insert, delete, find, + and replace. + 4) Implement reference based sub-string access (as a generalization of + pointer arithmetic.) + 5) Implement runtime write protection for strings. + +There is also a desire to avoid "API-bloat". So functionality that can be +implemented trivially in other functionality is omitted. So there is no +left$() or right$() or reverse() or anything like that as part of the core +functionality. + +Explaining Bstrings +------------------- + +A bstring is basically a header which wraps a pointer to a char buffer. Lets +start with the declaration of a struct tagbstring: + + struct tagbstring { + int mlen; + int slen; + unsigned char * data; + }; + +This definition is considered exposed, not opaque (though it is neither +necessary nor recommended that low level maintenance of bstrings be performed +whenever the abstract interfaces are sufficient). The mlen field (usually) +describes a lower bound for the memory allocated for the data field. The +slen field describes the exact length for the bstring. The data field is a +single contiguous buffer of unsigned chars. Note that the existence of a '\0' +character in the unsigned char buffer pointed to by the data field does not +necessarily denote the end of the bstring. + +To be a well formed modifiable bstring the mlen field must be at least the +length of the slen field, and slen must be non-negative. Furthermore, the +data field must point to a valid buffer in which access to the first mlen +characters has been acquired. So the minimal check for correctness is: + + (slen >= 0 && mlen >= slen && data != NULL) + +bstrings returned by bstring functions can be assumed to be either NULL or +satisfy the above property. (When bstrings are only readable, the mlen >= +slen restriction is not required; this is discussed later in this section.) +A bstring itself is just a pointer to a struct tagbstring: + + typedef struct tagbstring * bstring; + +Note that use of the prefix "tag" in struct tagbstring is required to work +around the inconsistency between C and C++'s struct namespace usage. This +definition is also considered exposed. + +Bstrlib basically manages bstrings allocated as a header and an associated +data-buffer. Since the implementation is exposed, they can also be +constructed manually. Functions which mutate bstrings assume that the header +and data buffer have been malloced; the bstring library may perform free() or +realloc() on both the header and data buffer of any bstring parameter. +Functions which return bstring's create new bstrings. The string memory is +freed by a bdestroy() call (or using the bstrFree macro). + +The following related typedef is also provided: + + typedef const struct tagbstring * const_bstring; + +which is also considered exposed. These are directly bstring compatible (no +casting required) but are just used for parameters which are meant to be +non-mutable. So in general, bstring parameters which are read as input but +not meant to be modified will be declared as const_bstring, and bstring +parameters which may be modified will be declared as bstring. This convention +is recommended for user written functions as well. + +Since bstrings maintain interoperability with C library char-buffer style +strings, all functions which modify, update or create bstrings also append a +'\0' character into the position slen + 1. This trailing '\0' character is +not required for bstrings input to the bstring functions; this is provided +solely as a convenience for interoperability with standard C char-buffer +functionality. + +Analogs for the ANSI C string library functions have been created when they +are necessary, but have also been left out when they are not. In particular +there are no functions analogous to fwrite, or puts just for the purposes of +bstring. The ->data member of any string is exposed, and therefore can be +used just as easily as char buffers for C functions which read strings. + +For those that wish to hand construct bstrings, the following should be kept +in mind: + + 1) While bstrlib can accept constructed bstrings without terminating + '\0' characters, the rest of the C language string library will not + function properly on such non-terminated strings. This is obvious + but must be kept in mind. + 2) If it is intended that a constructed bstring be written to by the + bstring library functions then the data portion should be allocated + by the malloc function and the slen and mlen fields should be entered + properly. The struct tagbstring header is not reallocated, and only + freed by bdestroy. + 3) Writing arbitrary '\0' characters at various places in the string + will not modify its length as perceived by the bstring library + functions. In fact, '\0' is a legitimate non-terminating character + for a bstring to contain. + 4) For read only parameters, bstring functions do not check the mlen. + I.e., the minimal correctness requirements are reduced to: + + (slen >= 0 && data != NULL) + +Better pointer arithmetic +------------------------- + +One built-in feature of '\0' terminated char * strings, is that its very easy +and fast to obtain a reference to the tail of any string using pointer +arithmetic. Bstrlib does one better by providing a way to get a reference to +any substring of a bstring (or any other length delimited block of memory.) +So rather than just having pointer arithmetic, with bstrlib one essentially +has segment arithmetic. This is achieved using the macro blk2tbstr() which +builds a reference to a block of memory and the macro bmid2tbstr() which +builds a reference to a segment of a bstring. Bstrlib also includes +functions for direct consumption of memory blocks into bstrings, namely +bcatblk () and blk2bstr (). + +One scenario where this can be extremely useful is when string contains many +substrings which one would like to pass as read-only reference parameters to +some string consuming function without the need to allocate entire new +containers for the string data. More concretely, imagine parsing a command +line string whose parameters are space delimited. This can only be done for +tails of the string with '\0' terminated char * strings. + +Improved NULL semantics and error handling +------------------------------------------ + +Unless otherwise noted, if a NULL pointer is passed as a bstring or any other +detectably illegal parameter, the called function will return with an error +indicator (either NULL or BSTR_ERR) rather than simply performing a NULL +pointer access, or having undefined behavior. + +To illustrate the value of this, consider the following example: + + strcpy (p = malloc (13 * sizeof (char)), "Hello,"); + strcat (p, " World"); + +This is not correct because malloc may return NULL (due to an out of memory +condition), and the behaviour of strcpy is undefined if either of its +parameters are NULL. However: + + bstrcat (p = bfromcstr ("Hello,"), q = bfromcstr (" World")); + bdestroy (q); + +is well defined, because if either p or q are assigned NULL (indicating a +failure to allocate memory) both bstrcat and bdestroy will recognize it and +perform no detrimental action. + +Note that it is not necessary to check any of the members of a returned +bstring for internal correctness (in particular the data member does not need +to be checked against NULL when the header is non-NULL), since this is +assured by the bstring library itself. + +bStreams +-------- + +In addition to the bgets and bread functions, bstrlib can abstract streams +with a high performance read only stream called a bStream. In general, the +idea is to open a core stream (with something like fopen) then pass its +handle as well as a bNread function pointer (like fread) to the bsopen +function which will return a handle to an open bStream. Then the functions +bsread, bsreadln or bsreadlns can be called to read portions of the stream. +Finally, the bsclose function is called to close the bStream -- it will +return a handle to the original (core) stream. So bStreams, essentially, +wrap other streams. + +The bStreams have two main advantages over the bgets and bread (as well as +fgets/ungetc) paradigms: + +1) Improved functionality via the bunread function which allows a stream to + unread characters, giving the bStream stack-like functionality if so + desired. +2) A very high performance bsreadln function. The C library function fgets() + (and the bgets function) can typically be written as a loop on top of + fgetc(), thus paying all of the overhead costs of calling fgetc on a per + character basis. bsreadln will read blocks at a time, thus amortizing the + overhead of fread calls over many characters at once. + +However, clearly bStreams are suboptimal or unusable for certain kinds of +streams (stdin) or certain usage patterns (a few spotty, or non-sequential +reads from a slow stream.) For those situations, using bgets will be more +appropriate. + +The semantics of bStreams allows practical construction of layerable data +streams. What this means is that by writing a bNread compatible function on +top of a bStream, one can construct a new bStream on top of it. This can be +useful for writing multi-pass parsers that don't actually read the entire +input more than once and don't require the use of intermediate storage. + +Aliasing +-------- + +Aliasing occurs when a function is given two parameters which point to data +structures which overlap in the memory they occupy. While this does not +disturb read only functions, for many libraries this can make functions that +write to these memory locations malfunction. This is a common problem of the +C standard library and especially the string functions in the C standard +library. + +The C standard string library is entirely char by char oriented (as is +bstring) which makes conforming implementations alias safe for some +scenarios. However no actual detection of aliasing is typically performed, +so it is easy to find cases where the aliasing will cause anomolous or +undesirable behaviour (consider: strcat (p, p).) The C99 standard includes +the "restrict" pointer modifier which allows the compiler to document and +assume a no-alias condition on usage. However, only the most trivial cases +can be caught (if at all) by the compiler at compile time, and thus there is +no actual enforcement of non-aliasing. + +Bstrlib, by contrast, permits aliasing and is completely aliasing safe, in +the C99 sense of aliasing. That is to say, under the assumption that +pointers of incompatible types from distinct objects can never alias, bstrlib +is completely aliasing safe. (In practice this means that the data buffer +portion of any bstring and header of any bstring are assumed to never alias.) +With the exception of the reference building macros, the library behaves as +if all read-only parameters are first copied and replaced by temporary +non-aliased parameters before any writing to any output bstring is performed +(though actual copying is extremely rarely ever done.) + +Besides being a useful safety feature, bstring searching/comparison +functions can improve to O(1) execution when aliasing is detected. + +Note that aliasing detection and handling code in Bstrlib is generally +extremely cheap. There is almost never any appreciable performance penalty +for using aliased parameters. + +Reenterancy +----------- + +Nearly every function in Bstrlib is a leaf function, and is completely +reenterable with the exception of writing to common bstrings. The split +functions which use a callback mechanism requires only that the source string +not be destroyed by the callback function unless the callback function returns +with an error status (note that Bstrlib functions which return an error do +not modify the string in any way.) The string can in fact be modified by the +callback and the behaviour is deterministic. See the documentation of the +various split functions for more details. + +Undefined scenarios +------------------- + +One of the basic important premises for Bstrlib is to not to increase the +propogation of undefined situations from parameters that are otherwise legal +in of themselves. In particular, except for extremely marginal cases, usages +of bstrings that use the bstring library functions alone cannot lead to any +undefined action. But due to C/C++ language and library limitations, there +is no way to define a non-trivial library that is completely without +undefined operations. All such possible undefined operations are described +below: + +1) bstrings or struct tagbstrings that are not explicitely initialized cannot + be passed as a parameter to any bstring function. +2) The members of the NULL bstring cannot be accessed directly. (Though all + APIs and macros detect the NULL bstring.) +3) A bstring whose data member has not been obtained from a malloc or + compatible call and which is write accessible passed as a writable + parameter will lead to undefined results. (i.e., do not writeAllow any + constructed bstrings unless the data portion has been obtained from the + heap.) +4) If the headers of two strings alias but are not identical (which can only + happen via a defective manual construction), then passing them to a + bstring function in which one is writable is not defined. +5) If the mlen member is larger than the actual accessible length of the data + member for a writable bstring, or if the slen member is larger than the + readable length of the data member for a readable bstring, then the + corresponding bstring operations are undefined. +6) Any bstring definition whose header or accessible data portion has been + assigned to inaccessible or otherwise illegal memory clearly cannot be + acted upon by the bstring library in any way. +7) Destroying the source of an incremental split from within the callback + and not returning with a negative value (indicating that it should abort) + will lead to undefined behaviour. (Though *modifying* or adjusting the + state of the source data, even if those modification fail within the + bstrlib API, has well defined behavior.) +8) Modifying a bstring which is write protected by direct access has + undefined behavior. + +While this may seem like a long list, with the exception of invalid uses of +the writeAllow macro, and source destruction during an iterative split +without an accompanying abort, no usage of the bstring API alone can cause +any undefined scenario to occurr. I.e., the policy of restricting usage of +bstrings to the bstring API can significantly reduce the risk of runtime +errors (in practice it should eliminate them) related to string manipulation +due to undefined action. + +C++ wrapper +----------- + +A C++ wrapper has been created to enable bstring functionality for C++ in the +most natural (for C++ programers) way possible. The mandate for the C++ +wrapper is different from the base C bstring library. Since the C++ language +has far more abstracting capabilities, the CBString structure is considered +fully abstracted -- i.e., hand generated CBStrings are not supported (though +conversion from a struct tagbstring is allowed) and all detectable errors are +manifest as thrown exceptions. + +- The C++ class definitions are all under the namespace Bstrlib. bstrwrap.h + enables this namespace (with a using namespace Bstrlib; directive at the + end) unless the macro BSTRLIB_DONT_ASSUME_NAMESPACE has been defined before + it is included. + +- Erroneous accesses results in an exception being thrown. The exception + parameter is of type "struct CBStringException" which is derived from + std::exception if STL is used. A verbose description of the error message + can be obtained from the what() method. + +- CBString is a C++ structure derived from a struct tagbstring. An address + of a CBString cast to a bstring must not be passed to bdestroy. The bstring + C API has been made C++ safe and can be used directly in a C++ project. + +- It includes constructors which can take a char, '\0' terminated char + buffer, tagbstring, (char, repeat-value), a length delimited buffer or a + CBStringList to initialize it. + +- Concatenation is performed with the + and += operators. Comparisons are + done with the ==, !=, <, >, <= and >= operators. Note that == and != use + the biseq call, while <, >, <= and >= use bstrcmp. + +- CBString's can be directly cast to const character buffers. + +- CBString's can be directly cast to double, float, int or unsigned int so + long as the CBString are decimal representations of those types (otherwise + an exception will be thrown). Converting the other way should be done with + the format(a) method(s). + +- CBString contains the length, character and [] accessor methods. The + character and [] accessors are aliases of each other. If the bounds for + the string are exceeded, an exception is thrown. To avoid the overhead for + this check, first cast the CBString to a (const char *) and use [] to + dereference the array as normal. Note that the character and [] accessor + methods allows both reading and writing of individual characters. + +- The methods: format, formata, find, reversefind, findcaseless, + reversefindcaseless, midstr, insert, insertchrs, replace, findreplace, + findreplacecaseless, remove, findchr, nfindchr, alloc, toupper, tolower, + gets, read are analogous to the functions that can be found in the C API. + +- The caselessEqual and caselessCmp methods are analogous to biseqcaseless + and bstricmp functions respectively. + +- Note that just like the bformat function, the format and formata methods do + not automatically cast CBStrings into char * strings for "%s"-type + substitutions: + + CBString w("world"); + CBString h("Hello"); + CBString hw; + + /* The casts are necessary */ + hw.format ("%s, %s", (const char *)h, (const char *)w); + +- The methods trunc and repeat have been added instead of using pattern. + +- ltrim, rtrim and trim methods have been added. These remove characters + from a given character string set (defaulting to the whitespace characters) + from either the left, right or both ends of the CBString, respectively. + +- The method setsubstr is also analogous in functionality to bsetstr, except + that it cannot be passed NULL. Instead the method fill and the fill-style + constructor have been supplied to enable this functionality. + +- The writeprotect(), writeallow() and iswriteprotected() methods are + analogous to the bwriteprotect(), bwriteallow() and biswriteprotected() + macros in the C API. Write protection semantics in CBString are stronger + than with the C API in that indexed character assignment is checked for + write protection. However, unlike with the C API, a write protected + CBString can be destroyed by the destructor. + +- CBStream is a C++ structure which wraps a struct bStream (its not derived + from it, since destruction is slightly different). It is constructed by + passing in a bNread function pointer and a stream parameter cast to void *. + This structure includes methods for detecting eof, setting the buffer + length, reading the whole stream or reading entries line by line or block + by block, an unread function, and a peek function. + +- If STL is available, the CBStringList structure is derived from a vector of + CBString with various split methods. The split method has been overloaded + to accept either a character or CBString as the second parameter (when the + split parameter is a CBString any character in that CBString is used as a + seperator). The splitstr method takes a CBString as a substring seperator. + Joins can be performed via a CBString constructor which takes a + CBStringList as a parameter, or just using the CBString::join() method. + +- If there is proper support for std::iostreams, then the >> and << operators + and the getline() function have been added (with semantics the same as + those for std::string). + +Multithreading +-------------- + +A mutable bstring is kind of analogous to a small (two entry) linked list +allocated by malloc, with all aliasing completely under programmer control. +I.e., manipulation of one bstring will never affect any other distinct +bstring unless explicitely constructed to do so by the programmer via hand +construction or via building a reference. Bstrlib also does not use any +static or global storage, so there are no hidden unremovable race conditions. +Bstrings are also clearly not inherently thread local. So just like +char *'s, bstrings can be passed around from thread to thread and shared and +so on, so long as modifications to a bstring correspond to some kind of +exclusive access lock as should be expected (or if the bstring is read-only, +which can be enforced by bstring write protection) for any sort of shared +object in a multithreaded environment. + +Bsafe module +------------ + +For convenience, a bsafe module has been included. The idea is that if this +module is included, inadvertant usage of the most dangerous C functions will +be overridden and lead to an immediate run time abort. Of course, it should +be emphasized that usage of this module is completely optional. The +intention is essentially to provide an option for creating project safety +rules which can be enforced mechanically rather than socially. This is +useful for larger, or open development projects where its more difficult to +enforce social rules or "coding conventions". + +Problems not solved +------------------- + +Bstrlib is written for the C and C++ languages, which have inherent weaknesses +that cannot be easily solved: + +1. Memory leaks: Forgetting to call bdestroy on a bstring that is about to be + unreferenced, just as forgetting to call free on a heap buffer that is + about to be dereferenced. Though bstrlib itself is leak free. +2. Read before write usage: In C, declaring an auto bstring does not + automatically fill it with legal/valid contents. This problem has been + somewhat mitigated in C++. (The bstrDeclare and bstrFree macros from + bstraux can be used to help mitigate this problem.) + +Other problems not addressed: + +3. Built-in mutex usage to automatically avoid all bstring internal race + conditions in multitasking environments: The problem with trying to + implement such things at this low a level is that it is typically more + efficient to use locks in higher level primitives. There is also no + platform independent way to implement locks or mutexes. +4. Unicode/widecharacter support. + +Note that except for spotty support of wide characters, the default C +standard library does not address any of these problems either. + +Configurable compilation options +-------------------------------- + +All configuration options are meant solely for the purpose of compiler +compatibility. Configuration options are not meant to change the semantics +or capabilities of the library, except where it is unavoidable. + +Since some C++ compilers don't include the Standard Template Library and some +have the options of disabling exception handling, a number of macros can be +used to conditionally compile support for each of this: + +BSTRLIB_CAN_USE_STL + + - defining this will enable the used of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CANNOT_USE_STL + + - defining this will disable the use of the Standard Template Library. + Defining BSTRLIB_CAN_USE_STL overrides the BSTRLIB_CANNOT_USE_STL macro. + +BSTRLIB_CAN_USE_IOSTREAM + + - defining this will enable the used of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_CANNOT_USE_IOSTREAM + + - defining this will disable the use of streams from class std. Defining + BSTRLIB_CAN_USE_IOSTREAM overrides the BSTRLIB_CANNOT_USE_IOSTREAM macro. + +BSTRLIB_THROWS_EXCEPTIONS + + - defining this will enable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROWS_EXCEPTIONS macro. + +BSTRLIB_DOESNT_THROW_EXCEPTIONS + + - defining this will disable the exception handling within bstring. + Defining BSTRLIB_THROWS_EXCEPTIONS overrides the + BSTRLIB_DOESNT_THROW_EXCEPTIONS macro. + +Note that these macros must be defined consistently throughout all modules +that use CBStrings including bstrwrap.cpp. + +Some older C compilers do not support functions such as vsnprintf. This is +handled by the following macro variables: + +BSTRLIB_NOVSNP + + - defining this indicates that the compiler does not support vsnprintf. + This will cause bformat and bformata to not be declared. Note that + for some compilers, such as Turbo C, this is set automatically. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +BSTRLIB_VSNP_OK + + - defining this will disable the autodetection of compilers the do not + support of compilers that do not support vsnprintf. + Defining BSTRLIB_NOVSNP overrides the BSTRLIB_VSNP_OK macro. + +Semantic compilation options +---------------------------- + +Bstrlib comes with very few compilation options for changing the semantics of +of the library. These are described below. + +BSTRLIB_DONT_ASSUME_NAMESPACE + + - Defining this before including bstrwrap.h will disable the automatic + enabling of the Bstrlib namespace for the C++ declarations. + +BSTRLIB_DONT_USE_VIRTUAL_DESTRUCTOR + + - Defining this will make the CBString destructor non-virtual. + +BSTRLIB_MEMORY_DEBUG + + - Defining this will cause the bstrlib modules bstrlib.c and bstrwrap.cpp + to invoke a #include "memdbg.h". memdbg.h has to be supplied by the user. + +Note that these macros must be defined consistently throughout all modules +that use bstrings or CBStrings including bstrlib.c, bstraux.c and +bstrwrap.cpp. + +=============================================================================== + +Files +----- + +bstrlib.c - C implementaion of bstring functions. +bstrlib.h - C header file for bstring functions. +bstraux.c - C example that implements trivial additional functions. +bstraux.h - C header for bstraux.c +bstest.c - C unit/regression test for bstrlib.c + +bstrwrap.cpp - C++ implementation of CBString. +bstrwrap.h - C++ header file for CBString. +test.cpp - C++ unit/regression test for bstrwrap.cpp + +bsafe.c - C runtime stubs to abort usage of unsafe C functions. +bsafe.h - C header file for bsafe.c functions. + +C projects need only include bstrlib.h and compile/link bstrlib.c to use the +bstring library. C++ projects need to additionally include bstrwrap.h and +compile/link bstrwrap.cpp. For both, there may be a need to make choices +about feature configuration as described in the "Configurable compilation +options" in the section above. + +Other files that are included in this archive are: + +license.txt - The 3 clause BSD license for Bstrlib +gpl.txt - The GPL version 2 +security.txt - A security statement useful for auditting Bstrlib +porting.txt - A guide to porting Bstrlib +bstrlib.txt - This file + +=============================================================================== + +The functions +------------- + + extern bstring bfromcstr (const char * str); + + Take a standard C library style '\0' terminated char buffer and generate + a bstring with the same contents as the char buffer. If an error occurs + NULL is returned. + + So for example: + + bstring b = bfromcstr ("Hello"); + if (!b) { + fprintf (stderr, "Out of memory"); + } else { + puts ((char *) b->data); + } + + .......................................................................... + + extern bstring bfromcstralloc (int mlen, const char * str); + + Create a bstring which contains the contents of the '\0' terminated + char * buffer str. The memory buffer backing the bstring is at least + mlen characters in length. If an error occurs NULL is returned. + + So for example: + + bstring b = bfromcstralloc (64, someCstr); + if (b) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as b was successfully created, since it will have been + allocated with at least 64 characters. + + .......................................................................... + + extern bstring blk2bstr (const void * blk, int len); + + Create a bstring whose contents are described by the contiguous buffer + pointing to by blk with a length of len bytes. Note that this function + creates a copy of the data in blk, rather than simply referencing it. + Compare with the blk2tbstr macro. If an error occurs NULL is returned. + + .......................................................................... + + extern char * bstr2cstr (const_bstring s, char z); + + Create a '\0' terminated char buffer which contains the contents of the + bstring s, except that any contained '\0' characters are converted to the + character in z. This returned value should be freed with bcstrfree(), by + the caller. If an error occurs NULL is returned. + + .......................................................................... + + extern int bcstrfree (char * s); + + Frees a C-string generated by bstr2cstr (). This is normally unnecessary + since it just wraps a call to free (), however, if malloc () and free () + have been redefined as a macros within the bstrlib module (via macros in + the memdbg.h backdoor) with some difference in behaviour from the std + library functions, then this allows a correct way of freeing the memory + that allows higher level code to be independent from these macro + redefinitions. + + .......................................................................... + + extern bstring bstrcpy (const_bstring b1); + + Make a copy of the passed in bstring. The copied bstring is returned if + there is no error, otherwise NULL is returned. + + .......................................................................... + + extern int bassign (bstring a, const_bstring b); + + Overwrite the bstring a with the contents of bstring b. Note that the + bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + int bassigncstr (bstring a, const char * str); + + Overwrite the string a with the contents of char * string str. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a may be partially overwritten. + + .......................................................................... + + int bassignblk (bstring a, const void * s, int len); + + Overwrite the string a with the contents of the block (s, len). Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern int bassignmidstr (bstring a, const_bstring b, int left, int len); + + Overwrite the bstring a with the middle of contents of bstring b + starting from position left and running for a length len. left and + len are clamped to the ends of b as with the function bmidstr. Note that + the bstring a must be a well defined and writable bstring. If an error + occurs BSTR_ERR is returned and a is not overwritten. + + .......................................................................... + + extern bstring bmidstr (const_bstring b, int left, int len); + + Create a bstring which is the substring of b starting from position left + and running for a length len (clamped by the end of the bstring b.) If + there was no error, the value of this constructed bstring is returned + otherwise NULL is returned. + + .......................................................................... + + extern int bdelete (bstring s1, int pos, int len); + + Removes characters from pos to pos+len-1 and shifts the tail of the + bstring starting from pos+len to pos. len must be positive for this call + to have any effect. The section of the bstring described by (pos, len) + is clamped to boundaries of the bstring b. The value BSTR_OK is returned + if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int bconcat (bstring b0, const_bstring b1); + + Concatenate the bstring b1 to the end of bstring b0. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bconchar (bstring b, char c); + + Concatenate the character c to the end of bstring b. The value BSTR_OK + is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatcstr (bstring b, const char * s); + + Concatenate the char * string s to the end of bstring b. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int bcatblk (bstring b, const void * s, int len); + + Concatenate a fixed length buffer (s, len) to the end of bstring b. The + value BSTR_OK is returned if the operation is successful, otherwise + BSTR_ERR is returned. + + .......................................................................... + + extern int biseq (const_bstring b0, const_bstring b1); + + Compare the bstring b0 and b1 for equality. If the bstrings differ, 0 + is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. If the length of the bstrings are different, this + function has O(1) complexity. Contained '\0' characters are not treated + as a termination character. + + Note that the semantics of biseq are not completely compatible with + bstrcmp because of its different treatment of the '\0' character. + + .......................................................................... + + extern int bisstemeqblk (const_bstring b, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len for + equality. If the beginning of b0 differs from the memory block (or if b0 + is too short), 0 is returned, if the bstrings are the same, 1 is returned, + if there is an error, -1 is returned. + + .......................................................................... + + extern int biseqcaseless (const_bstring b0, const_bstring b1); + + Compare two bstrings for equality without differentiating between case. + If the bstrings differ other than in case, 0 is returned, if the bstrings + are the same, 1 is returned, if there is an error, -1 is returned. If + the length of the bstrings are different, this function is O(1). '\0' + termination characters are not treated in any special way. + + .......................................................................... + + extern int bisstemeqcaselessblk (const_bstring b0, const void * blk, int len); + + Compare beginning of bstring b0 with a block of memory of length len + without differentiating between case for equality. If the beginning of b0 + differs from the memory block other than in case (or if b0 is too short), + 0 is returned, if the bstrings are the same, 1 is returned, if there is an + error, -1 is returned. + + .......................................................................... + + extern int biseqcstr (const_bstring b, const char *s); + + Compare the bstring b and char * bstring s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical with the bstring b with no '\0' + characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal when comparing them in the same format after converting one or the + other. If they are equal 1 is returned, if they are unequal 0 is + returned and if there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int biseqcstrcaseless (const_bstring b, const char *s); + + Compare the bstring b and char * string s. The C string s must be '\0' + terminated at exactly the length of the bstring b, and the contents + between the two must be identical except for case with the bstring b with + no '\0' characters for the two contents to be considered equal. This is + equivalent to the condition that their current contents will be always be + equal ignoring case when comparing them in the same format after + converting one or the other. If they are equal, except for case, 1 is + returned, if they are unequal regardless of case 0 is returned and if + there is a detectable error BSTR_ERR is returned. + + .......................................................................... + + extern int bstrcmp (const_bstring b0, const_bstring b1); + + Compare the bstrings b0 and b1 for ordering. If there is an error, + SHRT_MIN is returned, otherwise a value less than or greater than zero, + indicating that the bstring pointed to by b0 is lexicographically less + than or greater than the bstring pointed to by b1 is returned. If the + bstring lengths are unequal but the characters up until the length of the + shorter are equal then a value less than, or greater than zero, + indicating that the bstring pointed to by b0 is shorter or longer than the + bstring pointed to by b1 is returned. 0 is returned if and only if the + two bstrings are the same. If the length of the bstrings are different, + this function is O(n). Like its standard C library counter part, the + comparison does not proceed past any '\0' termination characters + encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strcmp. The function otherwise behaves very much like strcmp(). + + Note that the semantics of bstrcmp are not completely compatible with + biseq because of its different treatment of the '\0' termination + character. + + .......................................................................... + + extern int bstrncmp (const_bstring b0, const_bstring b1, int n); + + Compare the bstrings b0 and b1 for ordering for at most n characters. If + there is an error, SHRT_MIN is returned, otherwise a value is returned as + if b0 and b1 were first truncated to at most n characters then bstrcmp + was called with these new bstrings are paremeters. If the length of the + bstrings are different, this function is O(n). Like its standard C + library counter part, the comparison does not proceed past any '\0' + termination characters encountered. + + The seemingly odd error return value, merely provides slightly more + granularity than the undefined situation given in the C library function + strncmp. The function otherwise behaves very much like strncmp(). + + .......................................................................... + + extern int bstricmp (const_bstring b0, const_bstring b1); + + Compare two bstrings without differentiating between case. The return + value is the difference of the values of the characters where the two + bstrings first differ, otherwise 0 is returned indicating that the + bstrings are equal. If the lengths are different, then a difference from + 0 is given, but if the first extra character is '\0', then it is taken to + be the value UCHAR_MAX+1. + + .......................................................................... + + extern int bstrnicmp (const_bstring b0, const_bstring b1, int n); + + Compare two bstrings without differentiating between case for at most n + characters. If the position where the two bstrings first differ is + before the nth position, the return value is the difference of the values + of the characters, otherwise 0 is returned. If the lengths are different + and less than n characters, then a difference from 0 is given, but if the + first extra character is '\0', then it is taken to be the value + UCHAR_MAX+1. + + .......................................................................... + + extern int bdestroy (bstring b); + + Deallocate the bstring passed. Passing NULL in as a parameter will have + no effect. Note that both the header and the data portion of the bstring + will be freed. No other bstring function which modifies one of its + parameters will free or reallocate the header. Because of this, in + general, bdestroy cannot be called on any declared struct tagbstring even + if it is not write protected. A bstring which is write protected cannot + be destroyed via the bdestroy call. Any attempt to do so will result in + no action taken, and BSTR_ERR will be returned. + + Note to C++ users: Passing in a CBString cast to a bstring will lead to + undefined behavior (free will be called on the header, rather than the + CBString destructor.) Instead just use the ordinary C++ language + facilities to dealloc a CBString. + + .......................................................................... + + extern int binstr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise it returns BSTR_ERR. + The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binstrr (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction. If it is found then it returns with the + first position after pos where it is found, otherwise return BSTR_ERR. + Note that the current position at pos is tested as well -- so to be + disjoint from a previous forward search it is recommended that the + position be backed up (decremented) by one position. The algorithm used + is brute force; O(m*n). + + .......................................................................... + + extern int binstrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + forward (increasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise it returns BSTR_ERR. The algorithm used is brute force; + O(m*n). + + .......................................................................... + + extern int binstrrcaseless (const_bstring s1, int pos, const_bstring s2); + + Search for the bstring s2 in s1 starting at position pos and looking in a + backward (decreasing) direction but without regard to case. If it is + found then it returns with the first position after pos where it is + found, otherwise return BSTR_ERR. Note that the current position at pos + is tested as well -- so to be disjoint from a previous forward search it + is recommended that the position be backed up (decremented) by one + position. The algorithm used is brute force; O(m*n). + + .......................................................................... + + extern int binchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + one of the characters in b1 is found. This function has an execution + time of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int binchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which one of + the characters in b1 is found. This function has an execution time + of O(b0->slen + b1->slen). If such a position does not exist in b0, + then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchr (const_bstring b0, int pos, const_bstring b1); + + Search for the first position in b0 starting from pos or after, in which + none of the characters in b1 is found and return it. This function has + an execution time of O(b0->slen + b1->slen). If such a position does + not exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bninchrr (const_bstring b0, int pos, const_bstring b1); + + Search for the last position in b0 no greater than pos, in which none of + the characters in b1 is found and return it. This function has an + execution time of O(b0->slen + b1->slen). If such a position does not + exist in b0, then BSTR_ERR is returned. + + .......................................................................... + + extern int bstrchr (const_bstring b, int c); + + Search for the character c in the bstring b forwards from the start of + the bstring. Returns the position of the found character or BSTR_ERR if + it is not found. + + NOTE: This has been implemented as a macro on top of bstrchrp (). + + .......................................................................... + + extern int bstrrchr (const_bstring b, int c); + + Search for the character c in the bstring b backwards from the end of the + bstring. Returns the position of the found character or BSTR_ERR if it is + not found. + + NOTE: This has been implemented as a macro on top of bstrrchrp (). + + .......................................................................... + + extern int bstrchrp (const_bstring b, int c, int pos); + + Search for the character c in b forwards from the position pos + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bstrrchrp (const_bstring b, int c, int pos); + + Search for the character c in b backwards from the position pos in bstring + (inclusive). Returns the position of the found character or BSTR_ERR if + it is not found. + + .......................................................................... + + extern int bsetstr (bstring b0, int pos, const_bstring b1, unsigned char fill); + + Overwrite the bstring b0 starting at position pos with the bstring b1. If + the position pos is past the end of b0, then the character "fill" is + appended as necessary to make up the gap between the end of b0 and pos. + If b1 is NULL, it behaves as if it were a 0-length bstring. The value + BSTR_OK is returned if the operation is successful, otherwise BSTR_ERR is + returned. + + .......................................................................... + + extern int binsert (bstring s1, int pos, const_bstring s2, unsigned char fill); + + Inserts the bstring s2 into s1 at position pos. If the position pos is + past the end of s1, then the character "fill" is appended as necessary to + make up the gap between the end of s1 and pos. The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int binsertch (bstring s1, int pos, int len, unsigned char fill); + + Inserts the character fill repeatedly into s1 at position pos for a + length len. If the position pos is past the end of s1, then the + character "fill" is appended as necessary to make up the gap between the + end of s1 and the position pos + len (exclusive). The value BSTR_OK is + returned if the operation is successful, otherwise BSTR_ERR is returned. + + .......................................................................... + + extern int breplace (bstring b1, int pos, int len, const_bstring b2, + unsigned char fill); + + Replace a section of a bstring from pos for a length len with the bstring + b2. If the position pos is past the end of b1 then the character "fill" + is appended as necessary to make up the gap between the end of b1 and + pos. + + .......................................................................... + + extern int bfindreplace (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring with a replace bstring + after a given position in the bstring b. The find bstring must have a + length > 0 otherwise BSTR_ERR is returned. This function does not + perform recursive per character replacement; that is to say successive + searches resume at the position after the last replace. + + So for example: + + bfindreplace (a0 = bfromcstr("aabaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaAb". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int bfindreplacecaseless (bstring b, const_bstring find, + const_bstring replace, int position); + + Replace all occurrences of the find substring, ignoring case, with a + replace bstring after a given position in the bstring b. The find bstring + must have a length > 0 otherwise BSTR_ERR is returned. This function + does not perform recursive per character replacement; that is to say + successive searches resume at the position after the last replace. + + So for example: + + bfindreplacecaseless (a0 = bfromcstr("AAbaAb"), a1 = bfromcstr("a"), + a2 = bfromcstr("aa"), 0); + + Should result in changing a0 to "aaaabaaaab". + + This function performs exactly (b->slen - position) bstring comparisons, + and data movement is bounded above by character volume equivalent to size + of the output bstring. + + .......................................................................... + + extern int balloc (bstring b, int length); + + Increase the allocated memory backing the data buffer for the bstring b + to a length of at least length. If the memory backing the bstring b is + already large enough, not action is performed. This has no effect on the + bstring b that is visible to the bstring API. Usually this function will + only be used when a minimum buffer size is required coupled with a direct + access to the ->data member of the bstring structure. + + Be warned that like any other bstring function, the bstring must be well + defined upon entry to this function. I.e., doing something like: + + b->slen *= 2; /* ?? Most likely incorrect */ + balloc (b, b->slen); + + is invalid, and should be implemented as: + + int t; + if (BSTR_OK == balloc (b, t = (b->slen * 2))) b->slen = t; + + This function will return with BSTR_ERR if b is not detected as a valid + bstring or length is not greater than 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int ballocmin (bstring b, int length); + + Change the amount of memory backing the bstring b to at least length. + This operation will never truncate the bstring data including the + extra terminating '\0' and thus will not decrease the length to less than + b->slen + 1. Note that repeated use of this function may cause + performance problems (realloc may be called on the bstring more than + the O(log(INT_MAX)) times). This function will return with BSTR_ERR if b + is not detected as a valid bstring or length is not greater than 0, + otherwise BSTR_OK is returned. + + So for example: + + if (BSTR_OK == ballocmin (b, 64)) b->data[63] = 'x'; + + The idea is that this will set the 64th character of b to 'x' if it is at + least 64 characters long otherwise do nothing. And we know this is well + defined so long as the ballocmin call was successfully, since it will + ensure that b has been allocated with at least 64 characters. + + .......................................................................... + + int btrunc (bstring b, int n); + + Truncate the bstring to at most n characters. This function will return + with BSTR_ERR if b is not detected as a valid bstring or n is less than + 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bpattern (bstring b, int len); + + Replicate the starting bstring, b, end to end repeatedly until it + surpasses len characters, then chop the result to exactly len characters. + This function operates in-place. This function will return with BSTR_ERR + if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btoupper (bstring b); + + Convert contents of bstring to upper case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int btolower (bstring b); + + Convert contents of bstring to lower case. This function will return with + BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bltrimws (bstring b); + + Delete whitespace contiguous from the left end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int brtrimws (bstring b); + + Delete whitespace contiguous from the right end of the bstring. This + function will return with BSTR_ERR if b is NULL or of length 0, otherwise + BSTR_OK is returned. + + .......................................................................... + + extern int btrimws (bstring b); + + Delete whitespace contiguous from both ends of the bstring. This function + will return with BSTR_ERR if b is NULL or of length 0, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern int bstrListCreate (void); + + Create an empty struct bstrList. The struct bstrList output structure is + declared as follows: + + struct bstrList { + int qty, mlen; + bstring * entry; + }; + + The entry field actually is an array with qty number entries. The mlen + record counts the maximum number of bstring's for which there is memory + in the entry record. + + The Bstrlib API does *NOT* include a comprehensive set of functions for + full management of struct bstrList in an abstracted way. The reason for + this is because aliasing semantics of the list are best left to the user + of this function, and performance varies wildly depending on the + assumptions made. For a complete list of bstring data type it is + recommended that the C++ public std::vector be used, since its + semantics are usage are more standard. + + .......................................................................... + + extern int bstrListDestroy (struct bstrList * sl); + + Destroy a struct bstrList structure that was returned by the bsplit + function. Note that this will destroy each bstring in the ->entry array + as well. See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + extern int bstrListAlloc (struct bstrList * sl, int msz); + + Ensure that there is memory for at least msz number of entries for the + list. + + .......................................................................... + + extern int bstrListAllocMin (struct bstrList * sl, int msz); + + Try to allocate the minimum amount of memory for the list to include at + least msz entries or sl->qty whichever is greater. + + .......................................................................... + + extern struct bstrList * bsplit (bstring str, unsigned char splitChar); + + Create an array of sequential substrings from str divided by the + character splitChar. Successive occurrences of the splitChar will be + divided by empty bstring entries, following the semantics from the Python + programming language. To reclaim the memory from this output structure, + bstrListDestroy () should be called. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplits (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by any + character contained in splitStr. An empty splitStr causes a single entry + bstrList containing a copy of str to be returned. See bstrListCreate() + above for structure of struct bstrList. + + .......................................................................... + + extern struct bstrList * bsplitstr (bstring str, const_bstring splitStr); + + Create an array of sequential substrings from str divided by the entire + substring splitStr. An empty splitStr causes a single entry bstrList + containing a copy of str to be returned. See bstrListCreate() above for + structure of struct bstrList. + + .......................................................................... + + extern bstring bjoin (const struct bstrList * bl, const_bstring sep); + + Join the entries of a bstrList into one bstring by sequentially + concatenating them with the sep bstring in between. If sep is NULL, it + is treated as if it were the empty bstring. Note that: + + bjoin (l = bsplit (b, s->data[0]), s); + + should result in a copy of b, if s->slen is 1. If there is an error NULL + is returned, otherwise a bstring with the correct result is returned. + See bstrListCreate() above for structure of struct bstrList. + + .......................................................................... + + extern int bsplitcb (const_bstring str, unsigned char splitChar, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the character splitChar. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitcb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplit that is + abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitscb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by any of the characters in splitStr. An empty + splitStr causes the whole str to be iterated once. The parm passed to + bsplitcb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitscb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitscb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplits that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern int bsplitstrcb (const_bstring str, const_bstring splitStr, int pos, + int (* cb) (void * parm, int ofs, int len), void * parm); + + Iterate the set of disjoint sequential substrings over str starting at + position pos divided by the entire substring splitStr. An empty splitStr + causes each character of str to be iterated. The parm passed to bsplitcb + is passed on to cb. If the function cb returns a value < 0, then further + iterating is halted and this value is returned by bsplitcb. + + Note: Non-destructive modification of str from within the cb function + while performing this split is not undefined. bsplitstrcb behaves in + sequential lock step with calls to cb. I.e., after returning from a cb + that return a non-negative integer, bsplitstrcb continues from the position + 1 character after the last detected split character and it will halt + immediately if the length of str falls below this point. However, if the + cb function destroys str, then it *must* return with a negative value, + otherwise bsplitscb will continue in an undefined manner. + + This function is provided as an incremental alternative to bsplitstr that + is abortable and which does not impose additional memory allocation. + + .......................................................................... + + extern bstring bformat (const char * fmt, ...); + + Takes the same parameters as printf (), but rather than outputting + results to stdio, it forms a bstring which contains what would have been + output. Note that if there is an early generation of a '\0' character, + the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + b0 = bformat ("Hello, %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformat function is not present. + + .......................................................................... + + extern int bformata (bstring b, const char * fmt, ...); + + In addition to the initial output buffer b, bformata takes the same + parameters as printf (), but rather than outputting results to stdio, it + appends the results to the initial bstring parameter. Note that if + there is an early generation of a '\0' character, the bstring will be + truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bformata (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bformata function is not present. + + .......................................................................... + + extern int bassignformat (bstring b, const char * fmt, ...); + + After the first parameter, it takes the same parameters as printf (), but + rather than outputting results to stdio, it outputs the results to + the bstring parameter b. Note that if there is an early generation of a + '\0' character, the bstring will be truncated to this end point. + + Note that %s format tokens correspond to '\0' terminated char * buffers, + not bstrings. To print a bstring, first dereference data element of the + the bstring: + + /* b1->data needs to be '\0' terminated, so tagbstrings generated + by blk2tbstr () might not be suitable. */ + bassignformat (b0 = bfromcstr ("Hello"), ", %s", b1->data); + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bassignformat function is not present. + + .......................................................................... + + extern int bvcformata (bstring b, int count, const char * fmt, va_list arglist); + + The bvcformata function formats data under control of the format control + string fmt and attempts to append the result to b. The fmt parameter is + the same as that of the printf function. The variable argument list is + replaced with arglist, which has been initialized by the va_start macro. + The size of the output is upper bounded by count. If the required output + exceeds count, the string b is not augmented with any contents and a value + below BSTR_ERR is returned. If a value below -count is returned then it + is recommended that the negative of this value be used as an update to the + count in a subsequent pass. On other errors, such as running out of + memory, parameter errors or numeric wrap around BSTR_ERR is returned. + BSTR_OK is returned when the output is successfully generated and + appended to b. + + Note: There is no sanity checking of arglist, and this function is + destructive of the contents of b from the b->slen point onward. If there + is an early generation of a '\0' character, the bstring will be truncated + to this end point. + + Although this function is part of the external API for Bstrlib, the + interface and semantics (length limitations, and unusual return codes) + are fairly atypical. The real purpose for this function is to provide an + engine for the bvformata macro. + + Note that if the BSTRLIB_NOVSNP macro has been set when bstrlib has been + compiled the bvcformata function is not present. + + .......................................................................... + + extern bstring bread (bNread readPtr, void * parm); + typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, + void *parm); + + Read an entire stream into a bstring, verbatum. The readPtr function + pointer is compatible with fread sematics, except that it need not obtain + the stream data from a file. The intention is that parm would contain + the stream data context/state required (similar to the role of the FILE* + I/O stream parameter of fread.) + + Abstracting the block read function allows for block devices other than + file streams to be read if desired. Note that there is an ANSI + compatibility issue if "fread" is used directly; see the ANSI issues + section below. + + .......................................................................... + + extern int breada (bstring b, bNread readPtr, void * parm); + + Read an entire stream and append it to a bstring, verbatum. Behaves + like bread, except that it appends it results to the bstring b. + BSTR_ERR is returned on error, otherwise 0 is returned. + + .......................................................................... + + extern bstring bgets (bNgetc getcPtr, void * parm, char terminator); + typedef int (* bNgetc) (void * parm); + + Read a bstring from a stream. As many bytes as is necessary are read + until the terminator is consumed or no more characters are available from + the stream. If read from the stream, the terminator character will be + appended to the end of the returned bstring. The getcPtr function must + have the same semantics as the fgetc C library function (i.e., returning + an integer whose value is negative when there are no more characters + available, otherwise the value of the next available unsigned character + from the stream.) The intention is that parm would contain the stream + data context/state required (similar to the role of the FILE* I/O stream + parameter of fgets.) If no characters are read, or there is some other + detectable error, NULL is returned. + + bgets will never call the getcPtr function more often than necessary to + construct its output (including a single call, if required, to determine + that the stream contains no more characters.) + + Abstracting the character stream function and terminator character allows + for different stream devices and string formats other than '\n' + terminated lines in a file if desired (consider \032 terminated email + messages, in a UNIX mailbox for example.) + + For files, this function can be used analogously as fgets as follows: + + fp = fopen ( ... ); + if (fp) b = bgets ((bNgetc) fgetc, fp, '\n'); + + (Note that only one terminator character can be used, and that '\0' is + not assumed to terminate the stream in addition to the terminator + character. This is consistent with the semantics of fgets.) + + .......................................................................... + + extern int bgetsa (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it appends it results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern int bassigngets (bstring b, bNgetc getcPtr, void * parm, char terminator); + + Read from a stream and concatenate to a bstring. Behaves like bgets, + except that it assigns the results to the bstring b. The value 1 is + returned if no characters are read before a negative result is returned + from getcPtr. Otherwise BSTR_ERR is returned on error, and 0 is returned + in other normal cases. + + .......................................................................... + + extern struct bStream * bsopen (bNread readPtr, void * parm); + + Wrap a given open stream (described by a fread compatible function + pointer and stream handle) into an open bStream suitable for the bstring + library streaming functions. + + .......................................................................... + + extern void * bsclose (struct bStream * s); + + Close the bStream, and return the handle to the stream that was + originally used to open the given stream. If s is NULL or detectably + invalid, NULL will be returned. + + .......................................................................... + + extern int bsbufflength (struct bStream * s, int sz); + + Set the length of the buffer used by the bStream. If sz is the macro + BSTR_BS_BUFF_LENGTH_GET (which is 0), the length is not set. If s is + NULL or sz is negative, the function will return with BSTR_ERR, otherwise + this function returns with the previous length. + + .......................................................................... + + extern int bsreadln (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and return it into the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlna (bstring r, struct bStream * s, char terminator); + + Read a bstring terminated by the terminator character or the end of the + stream from the bStream (s) and concatenate it to the parameter r. The + matched terminator, if found, appears at the end of the line read. If + the stream has been exhausted of all available data, before any can be + read, BSTR_ERR is returned. This function may read additional characters + into the stream buffer from the core stream that are not returned, but + will be retained for subsequent read operations. When reading from high + speed streams, this function can perform significantly faster than bgets. + + .......................................................................... + + extern int bsreadlns (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and return it into the + parameter r. This function may read additional characters from the core + stream that are not returned, but will be retained for subsequent read + operations. + + .......................................................................... + + extern int bsreadlnsa (bstring r, struct bStream * s, bstring terminators); + + Read a bstring terminated by any character in the terminators bstring or + the end of the stream from the bStream (s) and concatenate it to the + parameter r. If the stream has been exhausted of all available data, + before any can be read, BSTR_ERR is returned. This function may read + additional characters from the core stream that are not returned, but + will be retained for subsequent read operations. + + .......................................................................... + + extern int bsread (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream. This function will read the minimum + required number of additional characters from the core stream. When the + stream is at the end of the file BSTR_ERR is returned, otherwise BSTR_OK + is returned. + + .......................................................................... + + extern int bsreada (bstring r, struct bStream * s, int n); + + Read a bstring of length n (or, if it is fewer, as many bytes as is + remaining) from the bStream and concatenate it to the parameter r. This + function will read the minimum required number of additional characters + from the core stream. When the stream is at the end of the file BSTR_ERR + is returned, otherwise BSTR_OK is returned. + + .......................................................................... + + extern int bsunread (struct bStream * s, const_bstring b); + + Insert a bstring into the bStream at the current position. These + characters will be read prior to those that actually come from the core + stream. + + .......................................................................... + + extern int bspeek (bstring r, const struct bStream * s); + + Return the number of currently buffered characters from the bStream that + will be read prior to reads from the core stream, and append it to the + the parameter r. + + .......................................................................... + + extern int bssplitscb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by any character from the bstring splitStr. The parm passed to + bssplitscb is passed on to cb. If the function cb returns a value < 0, + then further iterating is halted and this return value is returned by + bssplitscb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitscb will continue by starting the next split + at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bssplitstrcb (struct bStream * s, const_bstring splitStr, + int (* cb) (void * parm, int ofs, const_bstring entry), void * parm); + + Iterate the set of disjoint sequential substrings over the stream s + divided by the entire substring splitStr. The parm passed to + bssplitstrcb is passed on to cb. If the function cb returns a + value < 0, then further iterating is halted and this return value is + returned by bssplitstrcb. + + Note: At the point of calling the cb function, the bStream pointer is + pointed exactly at the position right after having read the split + character. The cb function can act on the stream by causing the bStream + pointer to move, and bssplitstrcb will continue by starting the next + split at the position of the pointer after the return from cb. + + However, if the cb causes the bStream s to be destroyed then the cb must + return with a negative value, otherwise bssplitscb will continue in an + undefined manner. + + This function is provided as way to incrementally parse through a file + or other generic stream that in total size may otherwise exceed the + practical or desired memory available. As with the other split callback + based functions this is abortable and does not impose additional memory + allocation. + + .......................................................................... + + extern int bseof (const struct bStream * s); + + Return the defacto "EOF" (end of file) state of a stream (1 if the + bStream is in an EOF state, 0 if not, and BSTR_ERR if stream is closed or + detectably erroneous.) When the readPtr callback returns a value <= 0 + the stream reaches its "EOF" state. Note that bunread with non-empty + content will essentially turn off this state, and the stream will not be + in its "EOF" state so long as its possible to read more data out of it. + + Also note that the semantics of bseof() are slightly different from + something like feof(). I.e., reaching the end of the stream does not + necessarily guarantee that bseof() will return with a value indicating + that this has happened. bseof() will only return indicating that it has + reached the "EOF" and an attempt has been made to read past the end of + the bStream. + +The macros +---------- + + The macros described below are shown in a prototype form indicating their + intended usage. Note that the parameters passed to these macros will be + referenced multiple times. As with all macros, programmer care is + required to guard against unintended side effects. + + int blengthe (const_bstring b, int err); + + Returns the length of the bstring. If the bstring is NULL err is + returned. + + .......................................................................... + + int blength (const_bstring b); + + Returns the length of the bstring. If the bstring is NULL, the length + returned is 0. + + .......................................................................... + + int bchare (const_bstring b, int p, int c); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then c is returned. + + .......................................................................... + + char bchar (const_bstring b, int p); + + Returns the p'th character of the bstring b. If the position p refers to + a position that does not exist in the bstring or the bstring is NULL, + then '\0' is returned. + + .......................................................................... + + char * bdatae (bstring b, char * err); + + Returns the char * data portion of the bstring b. If b is NULL, err is + returned. + + .......................................................................... + + char * bdata (bstring b); + + Returns the char * data portion of the bstring b. If b is NULL, NULL is + returned. + + .......................................................................... + + char * bdataofse (bstring b, int ofs, char * err); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, err is returned. + + .......................................................................... + + char * bdataofs (bstring b, int ofs); + + Returns the char * data portion of the bstring b offset by ofs. If b is + NULL, NULL is returned. + + .......................................................................... + + struct tagbstring var = bsStatic ("..."); + + The bsStatic macro allows for static declarations of literal string + constants as struct tagbstring structures. The resulting tagbstring does + not need to be freed or destroyed. Note that this macro is only well + defined for string literal arguments. For more general string pointers, + use the btfromcstr macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + <- bsStaticBlkParms ("...") + + The bsStaticBlkParms macro emits a pair of comma seperated parameters + corresponding to the block parameters for the block functions in Bstrlib + (i.e., blk2bstr, bcatblk, blk2tbstr, bisstemeqblk, bisstemeqcaselessblk.) + Note that this macro is only well defined for string literal arguments. + + Examples: + + bstring b = blk2bstr (bsStaticBlkParms ("Fast init. ")); + bcatblk (b, bsStaticBlkParms ("No frills fast concatenation.")); + + These are faster than using bfromcstr() and bcatcstr() respectively + because the length of the inline string is known as a compile time + constant. Also note that seperate struct tagbstring declarations for + holding the output of a bsStatic() macro are not required. + + .......................................................................... + + void btfromcstr (struct tagbstring& t, const char * s); + + Fill in the tagbstring t with the '\0' terminated char buffer s. This + action is purely reference oriented; no memory management is done. The + data member is just assigned s, and slen is assigned the strlen of s. + The s parameter is accessed exactly once in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblk (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len. This + action is purely reference oriented; no memory management is done. The + data member of t is just assigned s, and slen is assigned len. Note that + the buffer is not appended with a '\0' character. The s and len + parameters are accessed exactly once each in this macro. + + The resulting struct tagbstring is initially write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoke the + bwriteallow on this struct tagbstring to make it writeable (though this + requires that s be obtained from a function compatible with malloc.) + + .......................................................................... + + void btfromblkltrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblkrtrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been right trimmed. This action is purely reference oriented; no + memory management is done. The data member of t is just assigned to a + pointer inside the buffer s. Note that the buffer is not appended with a + '\0' character. The s and len parameters are accessed exactly once each + in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void btfromblktrimws (struct tagbstring& t, void * s, int len); + + Fill in the tagbstring t with the data buffer s with length len after it + has been left and right trimmed. This action is purely reference + oriented; no memory management is done. The data member of t is just + assigned to a pointer inside the buffer s. Note that the buffer is not + appended with a '\0' character. The s and len parameters are accessed + exactly once each in this macro. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. Invoking the bwriteallow macro onto this struct + tagbstring has no effect. + + .......................................................................... + + void bmid2tbstr (struct tagbstring& t, bstring b, int pos, int len); + + Fill the tagbstring t with the substring from b, starting from position + pos with a length len. The segment is clamped by the boundaries of + the bstring b. This action is purely reference oriented; no memory + management is done. Note that the buffer is not appended with a '\0' + character. Note that the t parameter to this macro may be accessed + multiple times. Note that the contents of t will become undefined + if the contents of b change or are destroyed. + + The resulting struct tagbstring is permanently write protected. Attempts + to write to this struct tagbstring in a write protected state from any + bstrlib function will lead to BSTR_ERR being returned. Invoking the + bwriteallow macro on this struct tagbstring will have no effect. + + .......................................................................... + + void bvformata (int& ret, bstring b, const char * format, lastarg); + + Append the bstring b with printf like formatting with the format control + string, and the arguments taken from the ... list of arguments after + lastarg passed to the containing function. If the containing function + does not have ... parameters or lastarg is not the last named parameter + before the ... then the results are undefined. If successful, the + results are appended to b and BSTR_OK is assigned to ret. Otherwise + BSTR_ERR is assigned to ret. + + Example: + + void dbgerror (FILE * fp, const char * fmt, ...) { + int ret; + bstring b; + bvformata (ret, b = bfromcstr ("DBG: "), fmt, fmt); + if (BSTR_OK == ret) fputs ((char *) bdata (b), fp); + bdestroy (b); + } + + Note that if the BSTRLIB_NOVSNP macro was set when bstrlib had been + compiled the bvformata macro will not link properly. If the + BSTRLIB_NOVSNP macro has been set, the bvformata macro will not be + available. + + .......................................................................... + + void bwriteprotect (struct tagbstring& t); + + Disallow bstring from being written to via the bstrlib API. Attempts to + write to the resulting tagbstring from any bstrlib function will lead to + BSTR_ERR being returned. + + Note: bstrings which are write protected cannot be destroyed via bdestroy. + + Note to C++ users: Setting a CBString as write protected will not prevent + it from being destroyed by the destructor. + + .......................................................................... + + void bwriteallow (struct tagbstring& t); + + Allow bstring to be written to via the bstrlib API. Note that such an + action makes the bstring both writable and destroyable. If the bstring is + not legitimately writable (as is the case for struct tagbstrings + initialized with a bsStatic value), the results of this are undefined. + + Note that invoking the bwriteallow macro may increase the number of + reallocs by one more than necessary for every call to bwriteallow + interleaved with any bstring API which writes to this bstring. + + .......................................................................... + + int biswriteprotected (struct tagbstring& t); + + Returns 1 if the bstring is write protected, otherwise 0 is returned. + +=============================================================================== + +The bstest module +----------------- + +The bstest module is just a unit test for the bstrlib module. For correct +implementations of bstrlib, it should execute with 0 failures being reported. +This test should be utilized if modifications/customizations to bstrlib have +been performed. It tests each core bstrlib function with bstrings of every +mode (read-only, NULL, static and mutable) and ensures that the expected +semantics are observed (including results that should indicate an error). It +also tests for aliasing support. Passing bstest is a necessary but not a +sufficient condition for ensuring the correctness of the bstrlib module. + + +The test module +--------------- + +The test module is just a unit test for the bstrwrap module. For correct +implementations of bstrwrap, it should execute with 0 failures being +reported. This test should be utilized if modifications/customizations to +bstrwrap have been performed. It tests each core bstrwrap function with +CBStrings write protected or not and ensures that the expected semantics are +observed (including expected exceptions.) Note that exceptions cannot be +disabled to run this test. Passing test is a necessary but not a sufficient +condition for ensuring the correctness of the bstrwrap module. + +=============================================================================== + +Using Bstring and CBString as an alternative to the C library +------------------------------------------------------------- + +First let us give a table of C library functions and the alternative bstring +functions and CBString methods that should be used instead of them. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +gets bgets ::gets +strcpy bassign = operator +strncpy bassignmidstr ::midstr +strcat bconcat += operator +strncat bconcat + btrunc += operator + ::trunc +strtok bsplit, bsplits ::split +sprintf b(assign)format ::format +snprintf b(assign)format + btrunc ::format + ::trunc +vsprintf bvformata bvformata + +vsnprintf bvformata + btrunc bvformata + btrunc +vfprintf bvformata + fputs use bvformata + fputs +strcmp biseq, bstrcmp comparison operators. +strncmp bstrncmp, memcmp bstrncmp, memcmp +strlen ->slen, blength ::length +strdup bstrcpy constructor +strset bpattern ::fill +strstr binstr ::find +strpbrk binchr ::findchr +stricmp bstricmp cast & use bstricmp +strlwr btolower cast & use btolower +strupr btoupper cast & use btoupper +strrev bReverse (aux module) cast & use bReverse +strchr bstrchr cast & use bstrchr +strspnp use strspn use strspn +ungetc bsunread bsunread + +The top 9 C functions listed here are troublesome in that they impose memory +management in the calling function. The Bstring and CBstring interfaces have +built-in memory management, so there is far less code with far less potential +for buffer overrun problems. strtok can only be reliably called as a "leaf" +calculation, since it (quite bizarrely) maintains hidden internal state. And +gets is well known to be broken no matter what. The Bstrlib alternatives do +not suffer from those sorts of problems. + +The substitute for strncat can be performed with higher performance by using +the blk2tbstr macro to create a presized second operand for bconcat. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +strspn strspn acceptable strspn acceptable +strcspn strcspn acceptable strcspn acceptable +strnset strnset acceptable strnset acceptable +printf printf acceptable printf acceptable +puts puts acceptable puts acceptable +fprintf fprintf acceptable fprintf acceptable +fputs fputs acceptable fputs acceptable +memcmp memcmp acceptable memcmp acceptable + +Remember that Bstring (and CBstring) functions will automatically append the +'\0' character to the character data buffer. So by simply accessing the data +buffer directly, ordinary C string library functions can be called directly +on them. Note that bstrcmp is not the same as memcmp in exactly the same way +that strcmp is not the same as memcmp. + +C-library Bstring alternative CBString alternative +--------- ------------------- -------------------- +fread balloc + fread ::alloc + fread +fgets balloc + fgets ::alloc + fgets + +These are odd ones because of the exact sizing of the buffer required. The +Bstring and CBString alternatives requires that the buffers are forced to +hold at least the prescribed length, then just use fread or fgets directly. +However, typically the automatic memory management of Bstring and CBstring +will make the typical use of fgets and fread to read specifically sized +strings unnecessary. + +Implementation Choices +---------------------- + +Overhead: +......... + +The bstring library has more overhead versus straight char buffers for most +functions. This overhead is essentially just the memory management and +string header allocation. This overhead usually only shows up for small +string manipulations. The performance loss has to be considered in +light of the following: + +1) What would be the performance loss of trying to write this management + code in one's own application? +2) Since the bstring library source code is given, a sufficiently powerful + modern inlining globally optimizing compiler can remove function call + overhead. + +Since the data type is exposed, a developer can replace any unsatisfactory +function with their own inline implementation. And that is besides the main +point of what the better string library is mainly meant to provide. Any +overhead lost has to be compared against the value of the safe abstraction +for coupling memory management and string functionality. + +Performance of the C interface: +............................... + +The algorithms used have performance advantages versus the analogous C +library functions. For example: + +1. bfromcstr/blk2str/bstrcpy versus strcpy/strdup. By using memmove instead + of strcpy, the break condition of the copy loop is based on an independent + counter (that should be allocated in a register) rather than having to + check the results of the load. Modern out-of-order executing CPUs can + parallelize the final branch mis-predict penality with the loading of the + source string. Some CPUs will also tend to have better built-in hardware + support for counted memory moves than load-compare-store. (This is a + minor, but non-zero gain.) +2. biseq versus strcmp. If the strings are unequal in length, bsiseq will + return in O(1) time. If the strings are aliased, or have aliased data + buffers, biseq will return in O(1) time. strcmp will always be O(k), + where k is the length of the common prefix or the whole string if they are + identical. +3. ->slen versus strlen. ->slen is obviously always O(1), while strlen is + always O(n) where n is the length of the string. +4. bconcat versus strcat. Both rely on precomputing the length of the + destination string argument, which will favor the bstring library. On + iterated concatenations the performance difference can be enormous. +5. bsreadln versus fgets. The bsreadln function reads large blocks at a time + from the given stream, then parses out lines from the buffers directly. + Some C libraries will implement fgets as a loop over single fgetc calls. + Testing indicates that the bsreadln approach can be several times faster + for fast stream devices (such as a file that has been entirely cached.) +6. bsplits/bsplitscb versus strspn. Accelerators for the set of match + characters are generated only once. +7. binstr versus strstr. The binstr implementation unrolls the loops to + help reduce loop overhead. This will matter if the target string is + long and source string is not found very early in the target string. + With strstr, while it is possible to unroll the source contents, it is + not possible to do so with the destination contents in a way that is + effective because every destination character must be tested against + '\0' before proceeding to the next character. +8. bReverse versus strrev. The C function must find the end of the string + first before swaping character pairs. +9. bstrrchr versus no comparable C function. Its not hard to write some C + code to search for a character from the end going backwards. But there + is no way to do this without computing the length of the string with + strlen. + +Practical testing indicates that in general Bstrlib is never signifcantly +slower than the C library for common operations, while very often having a +performance advantage that ranges from significant to massive. Even for +functions like b(n)inchr versus str(c)spn() (where, in theory, there is no +advantage for the Bstrlib architecture) the performance of Bstrlib is vastly +superior to most tested C library implementations. + +Some of Bstrlib's extra functionality also lead to inevitable performance +advantages over typical C solutions. For example, using the blk2tbstr macro, +one can (in O(1) time) generate an internal substring by reference while not +disturbing the original string. If disturbing the original string is not an +option, typically, a comparable char * solution would have to make a copy of +the substring to provide similar functionality. Another example is reverse +character set scanning -- the str(c)spn functions only scan in a forward +direction which can complicate some parsing algorithms. + +Where high performance char * based algorithms are available, Bstrlib can +still leverage them by accessing the ->data field on bstrings. So +realistically Bstrlib can never be significantly slower than any standard +'\0' terminated char * based solutions. + +Performance of the C++ interface: +................................. + +The C++ interface has been designed with an emphasis on abstraction and safety +first. However, since it is substantially a wrapper for the C bstring +functions, for longer strings the performance comments described in the +"Performance of the C interface" section above still apply. Note that the +(CBString *) type can be directly cast to a (bstring) type, and passed as +parameters to the C functions (though a CBString must never be passed to +bdestroy.) + +Probably the most controversial choice is performing full bounds checking on +the [] operator. This decision was made because 1) the fast alternative of +not bounds checking is still available by first casting the CBString to a +(const char *) buffer or to a (struct tagbstring) then derefencing .data and +2) because the lack of bounds checking is seen as one of the main weaknesses +of C/C++ versus other languages. This check being done on every access leads +to individual character extraction being actually slower than other languages +in this one respect (other language's compilers will normally dedicate more +resources on hoisting or removing bounds checking as necessary) but otherwise +bring C++ up to the level of other languages in terms of functionality. + +It is common for other C++ libraries to leverage the abstractions provided by +C++ to use reference counting and "copy on write" policies. While these +techniques can speed up some scenarios, they impose a problem with respect to +thread safety. bstrings and CBStrings can be properly protected with +"per-object" mutexes, meaning that two bstrlib calls can be made and execute +simultaneously, so long as the bstrings and CBstrings are distinct. With a +reference count and alias before copy on write policy, global mutexes are +required that prevent multiple calls to the strings library to execute +simultaneously regardless of whether or not the strings represent the same +string. + +One interesting trade off in CBString is that the default constructor is not +trivial. I.e., it always prepares a ready to use memory buffer. The purpose +is to ensure that there is a uniform internal composition for any functioning +CBString that is compatible with bstrings. It also means that the other +methods in the class are not forced to perform "late initialization" checks. +In the end it means that construction of CBStrings are slower than other +comparable C++ string classes. Initial testing, however, indicates that +CBString outperforms std::string and MFC's CString, for example, in all other +operations. So to work around this weakness it is recommended that CBString +declarations be pushed outside of inner loops. + +Practical testing indicates that with the exception of the caveats given +above (constructors and safe index character manipulations) the C++ API for +Bstrlib generally outperforms popular standard C++ string classes. Amongst +the standard libraries and compilers, the quality of concatenation operations +varies wildly and very little care has gone into search functions. Bstrlib +dominates those performance benchmarks. + +Memory management: +.................. + +The bstring functions which write and modify bstrings will automatically +reallocate the backing memory for the char buffer whenever it is required to +grow. The algorithm for resizing chosen is to snap up to sizes that are a +power of two which are sufficient to hold the intended new size. Memory +reallocation is not performed when the required size of the buffer is +decreased. This behavior can be relied on, and is necessary to make the +behaviour of balloc deterministic. This trades off additional memory usage +for decreasing the frequency for required reallocations: + +1. For any bstring whose size never exceeds n, its buffer is not ever + reallocated more than log_2(n) times for its lifetime. +2. For any bstring whose size never exceeds n, its buffer is never more than + 2*(n+1) in length. (The extra characters beyond 2*n are to allow for the + implicit '\0' which is always added by the bstring modifying functions.) + +Decreasing the buffer size when the string decreases in size would violate 1) +above and in real world case lead to pathological heap thrashing. Similarly, +allocating more tightly than "least power of 2 greater than necessary" would +lead to a violation of 1) and have the same potential for heap thrashing. + +Property 2) needs emphasizing. Although the memory allocated is always a +power of 2, for a bstring that grows linearly in size, its buffer memory also +grows linearly, not exponentially. The reason is that the amount of extra +space increases with each reallocation, which decreases the frequency of +future reallocations. + +Obviously, given that bstring writing functions may reallocate the data +buffer backing the target bstring, one should not attempt to cache the data +buffer address and use it after such bstring functions have been called. +This includes making reference struct tagbstrings which alias to a writable +bstring. + +balloc or bfromcstralloc can be used to preallocate the minimum amount of +space used for a given bstring. This will reduce even further the number of +times the data portion is reallocated. If the length of the string is never +more than one less than the memory length then there will be no further +reallocations. + +Note that invoking the bwriteallow macro may increase the number of reallocs +by one more than necessary for every call to bwriteallow interleaved with any +bstring API which writes to this bstring. + +The library does not use any mechanism for automatic clean up for the C API. +Thus explicit clean up via calls to bdestroy() are required to avoid memory +leaks. + +Constant and static tagbstrings: +................................ + +A struct tagbstring can be write protected from any bstrlib function using +the bwriteprotect macro. A write protected struct tagbstring can then be +reset to being writable via the bwriteallow macro. There is, of course, no +protection from attempts to directly access the bstring members. Modifying a +bstring which is write protected by direct access has undefined behavior. + +static struct tagbstrings can be declared via the bsStatic macro. They are +considered permanently unwritable. Such struct tagbstrings's are declared +such that attempts to write to it are not well defined. Invoking either +bwriteallow or bwriteprotect on static struct tagbstrings has no effect. + +struct tagbstring's initialized via btfromcstr or blk2tbstr are protected by +default but can be made writeable via the bwriteallow macro. If bwriteallow +is called on such struct tagbstring's, it is the programmer's responsibility +to ensure that: + +1) the buffer supplied was allocated from the heap. +2) bdestroy is not called on this tagbstring (unless the header itself has + also been allocated from the heap.) +3) free is called on the buffer to reclaim its memory. + +bwriteallow and bwriteprotect can be invoked on ordinary bstrings (they have +to be dereferenced with the (*) operator to get the levels of indirection +correct) to give them write protection. + +Buffer declaration: +................... + +The memory buffer is actually declared "unsigned char *" instead of "char *". +The reason for this is to trigger compiler warnings whenever uncasted char +buffers are assigned to the data portion of a bstring. This will draw more +diligent programmers into taking a second look at the code where they +have carelessly left off the typically required cast. (Research from +AT&T/Lucent indicates that additional programmer eyeballs is one of the most +effective mechanisms at ferreting out bugs.) + +Function pointers: +.................. + +The bgets, bread and bStream functions use function pointers to obtain +strings from data streams. The function pointer declarations have been +specifically chosen to be compatible with the fgetc and fread functions. +While this may seem to be a convoluted way of implementing fgets and fread +style functionality, it has been specifically designed this way to ensure +that there is no dependency on a single narrowly defined set of device +interfaces, such as just stream I/O. In the embedded world, its quite +possible to have environments where such interfaces may not exist in the +standard C library form. Furthermore, the generalization that this opens up +allows for more sophisticated uses for these functions (performing an fgets +like function on a socket, for example.) By using function pointers, it also +allows such abstract stream interfaces to be created using the bstring library +itself while not creating a circular dependency. + +Use of int's for sizes: +....................... + +This is just a recognition that 16bit platforms with requirements for strings +that are larger than 64K and 32bit+ platforms with requirements for strings +that are larger than 4GB are pretty marginal. The main focus is for 32bit +platforms, and emerging 64bit platforms with reasonable < 4GB string +requirements. Using ints allows for negative values which has meaning +internally to bstrlib. + +Semantic consideration: +....................... + +Certain care needs to be taken when copying and aliasing bstrings. A bstring +is essentially a pointer type which points to a multipart abstract data +structure. Thus usage, and lifetime of bstrings have semantics that follow +these considerations. For example: + + bstring a, b; + struct tagbstring t; + + a = bfromcstr("Hello"); /* Create new bstring and copy "Hello" into it. */ + b = a; /* Alias b to the contents of a. */ + t = *a; /* Create a current instance pseudo-alias of a. */ + bconcat (a, b); /* Double a and b, t is now undefined. */ + bdestroy (a); /* Destroy the contents of both a and b. */ + +Variables of type bstring are really just references that point to real +bstring objects. The equal operator (=) creates aliases, and the asterisk +dereference operator (*) creates a kind of alias to the current instance (which +is generally not useful for any purpose.) Using bstrcpy() is the correct way +of creating duplicate instances. The ampersand operator (&) is useful for +creating aliases to struct tagbstrings (remembering that constructed struct +tagbstrings are not writable by default.) + +CBStrings use complete copy semantics for the equal operator (=), and thus do +not have these sorts of issues. + +Debugging: +.......... + +Bstrings have a simple, exposed definition and construction, and the library +itself is open source. So most debugging is going to be fairly straight- +forward. But the memory for bstrings come from the heap, which can often be +corrupted indirectly, and it might not be obvious what has happened even from +direct examination of the contents in a debugger or a core dump. There are +some tools such as Purify, Insure++ and Electric Fence which can help solve +such problems, however another common approach is to directly instrument the +calls to malloc, realloc, calloc, free, memcpy, memmove and/or other calls +by overriding them with macro definitions. + +Although the user could hack on the Bstrlib sources directly as necessary to +perform such an instrumentation, Bstrlib comes with a built-in mechanism for +doing this. By defining the macro BSTRLIB_MEMORY_DEBUG and providing an +include file named memdbg.h this will force the core Bstrlib modules to +attempt to include this file. In such a file, macros could be defined which +overrides Bstrlib's useage of the C standard library. + +Rather than calling malloc, realloc, free, memcpy or memmove directly, Bstrlib +emits the macros bstr__alloc, bstr__realloc, bstr__free, bstr__memcpy and +bstr__memmove in their place respectively. By default these macros are simply +assigned to be equivalent to their corresponding C standard library function +call. However, if they are given earlier macro definitions (via the back +door include file) they will not be given their default definition. In this +way Bstrlib's interface to the standard library can be changed but without +having to directly redefine or link standard library symbols (both of which +are not strictly ANSI C compliant.) + +An example definition might include: + + #define bstr__alloc(sz) X_malloc ((sz), __LINE__, __FILE__) + +which might help contextualize heap entries in a debugging environment. + +The NULL parameter and sanity checking of bstrings is part of the Bstrlib +API, and thus Bstrlib itself does not present any different modes which would +correspond to "Debug" or "Release" modes. Bstrlib always contains mechanisms +which one might think of as debugging features, but retains the performance +and small memory footprint one would normally associate with release mode +code. + +Integration Microsoft's Visual Studio debugger: +............................................... + +Microsoft's Visual Studio debugger has a capability of customizable mouse +float over data type descriptions. This is accomplished by editting the +AUTOEXP.DAT file to include the following: + + ; new for CBString + tagbstring =slen= mlen= + Bstrlib::CBStringList =count= + +In Visual C++ 6.0 this file is located in the directory: + + C:\Program Files\Microsoft Visual Studio\Common\MSDev98\Bin + +and in Visual Studio .NET 2003 its located here: + + C:\Program Files\Microsoft Visual Studio .NET 2003\Common7\Packages\Debugger + +This will improve the ability of debugging with Bstrlib under Visual Studio. + +Security +-------- + +Bstrlib does not come with explicit security features outside of its fairly +comprehensive error detection, coupled with its strict semantic support. +That is to say that certain common security problems, such as buffer overrun, +constant overwrite, arbitrary truncation etc, are far less likely to happen +inadvertently. Where it does help, Bstrlib maximizes its advantage by +providing developers a simple adoption path that lets them leave less secure +string mechanisms behind. The library will not leave developers wanting, so +they will be less likely to add new code using a less secure string library +to add functionality that might be missing from Bstrlib. + +That said there are a number of security ideas not addressed by Bstrlib: + +1. Race condition exploitation (i.e., verifying a string's contents, then +raising the privilege level and execute it as a shell command as two +non-atomic steps) is well beyond the scope of what Bstrlib can provide. It +should be noted that MFC's built-in string mutex actually does not solve this +problem either -- it just removes immediate data corruption as a possible +outcome of such exploit attempts (it can be argued that this is worse, since +it will leave no trace of the exploitation). In general race conditions have +to be dealt with by careful design and implementation; it cannot be assisted +by a string library. + +2. Any kind of access control or security attributes to prevent usage in +dangerous interfaces such as system(). Perl includes a "trust" attribute +which can be endowed upon strings that are intended to be passed to such +dangerous interfaces. However, Perl's solution reflects its own limitations +-- notably that it is not a strongly typed language. In the example code for +Bstrlib, there is a module called taint.cpp. It demonstrates how to write a +simple wrapper class for managing "untainted" or trusted strings using the +type system to prevent questionable mixing of ordinary untrusted strings with +untainted ones then passing them to dangerous interfaces. In this way the +security correctness of the code reduces to auditing the direct usages of +dangerous interfaces or promotions of tainted strings to untainted ones. + +3. Encryption of string contents is way beyond the scope of Bstrlib. +Maintaining encrypted string contents in the futile hopes of thwarting things +like using system-level debuggers to examine sensitive string data is likely +to be a wasted effort (imagine a debugger that runs at a higher level than a +virtual processor where the application runs). For more standard encryption +usages, since the bstring contents are simply binary blocks of data, this +should pose no problem for usage with other standard encryption libraries. + +Compatibility +------------- + +The Better String Library is known to compile and function correctly with the +following compilers: + + - Microsoft Visual C++ + - Watcom C/C++ + - Intel's C/C++ compiler (Windows) + - The GNU C/C++ compiler (cygwin and Linux on PPC64) + - Borland C + - Turbo C + +Setting of configuration options should be unnecessary for these compilers +(unless exceptions are being disabled or STLport has been added to WATCOM +C/C++). Bstrlib has been developed with an emphasis on portability. As such +porting it to other compilers should be straight forward. This package +includes a porting guide (called porting.txt) which explains what issues may +exist for porting Bstrlib to different compilers and environments. + +ANSI issues +----------- + +1. The function pointer types bNgetc and bNread have prototypes which are very +similar to, but not exactly the same as fgetc and fread respectively. +Basically the FILE * parameter is replaced by void *. The purpose of this +was to allow one to create other functions with fgetc and fread like +semantics without being tied to ANSI C's file streaming mechanism. I.e., one +could very easily adapt it to sockets, or simply reading a block of memory, +or procedurally generated strings (for fractal generation, for example.) + +The problem is that invoking the functions (bNgetc)fgetc and (bNread)fread is +not technically legal in ANSI C. The reason being that the compiler is only +able to coerce the function pointers themselves into the target type, however +are unable to perform any cast (implicit or otherwise) on the parameters +passed once invoked. I.e., if internally void * and FILE * need some kind of +mechanical coercion, the compiler will not properly perform this conversion +and thus lead to undefined behavior. + +Apparently a platform from Data General called "Eclipse" and another from +Tandem called "NonStop" have a different representation for pointers to bytes +and pointers to words, for example, where coercion via casting is necessary. +(Actual confirmation of the existence of such machines is hard to come by, so +it is prudent to be skeptical about this information.) However, this is not +an issue for any known contemporary platforms. One may conclude that such +platforms are effectively apocryphal even if they do exist. + +To correctly work around this problem to the satisfaction of the ANSI +limitations, one needs to create wrapper functions for fgets and/or +fread with the prototypes of bNgetc and/or bNread respectively which performs +no other action other than to explicitely cast the void * parameter to a +FILE *, and simply pass the remaining parameters straight to the function +pointer call. + +The wrappers themselves are trivial: + + size_t freadWrap (void * buff, size_t esz, size_t eqty, void * parm) { + return fread (buff, esz, eqty, (FILE *) parm); + } + + int fgetcWrap (void * parm) { + return fgetc ((FILE *) parm); + } + +These have not been supplied in bstrlib or bstraux to prevent unnecessary +linking with file I/O functions. + +2. vsnprintf is not available on all compilers. Because of this, the bformat +and bformata functions (and format and formata methods) are not guaranteed to +work properly. For those compilers that don't have vsnprintf, the +BSTRLIB_NOVSNP macro should be set before compiling bstrlib, and the format +functions/method will be disabled. + +The more recent ANSI C standards have specified the required inclusion of a +vsnprintf function. + +3. The bstrlib function names are not unique in the first 6 characters. This +is only an issue for older C compiler environments which do not store more +than 6 characters for function names. + +4. The bsafe module defines macros and function names which are part of the +C library. This simply overrides the definition as expected on all platforms +tested, however it is not sanctioned by the ANSI standard. This module is +clearly optional and should be omitted on platforms which disallow its +undefined semantics. + +In practice the real issue is that some compilers in some modes of operation +can/will inline these standard library functions on a module by module basis +as they appear in each. The linker will thus have no opportunity to override +the implementation of these functions for those cases. This can lead to +inconsistent behaviour of the bsafe module on different platforms and +compilers. + +=============================================================================== + +Comparison with Microsoft's CString class +----------------------------------------- + +Although developed independently, CBStrings have very similar functionality to +Microsoft's CString class. However, the bstring library has significant +advantages over CString: + +1. Bstrlib is a C-library as well as a C++ library (using the C++ wrapper). + + - Thus it is compatible with more programming environments and + available to a wider population of programmers. + +2. The internal structure of a bstring is considered exposed. + + - A single contiguous block of data can be cut into read-only pieces by + simply creating headers, without allocating additional memory to create + reference copies of each of these sub-strings. + - In this way, using bstrings in a totally abstracted way becomes a choice + rather than an imposition. Further this choice can be made differently + at different layers of applications that use it. + +3. Static declaration support precludes the need for constructor + invocation. + + - Allows for static declarations of constant strings that has no + additional constructor overhead. + +4. Bstrlib is not attached to another library. + + - Bstrlib is designed to be easily plugged into any other library + collection, without dependencies on other libraries or paradigms (such + as "MFC".) + +The bstring library also comes with a few additional functions that are not +available in the CString class: + + - bsetstr + - bsplit + - bread + - breplace (this is different from CString::Replace()) + - Writable indexed characters (for example a[i]='x') + +Interestingly, although Microsoft did implement mid$(), left$() and right$() +functional analogues (these are functions from GWBASIC) they seem to have +forgotten that mid$() could be also used to write into the middle of a string. +This functionality exists in Bstrlib with the bsetstr() and breplace() +functions. + +Among the disadvantages of Bstrlib is that there is no special support for +localization or wide characters. Such things are considered beyond the scope +of what bstrings are trying to deliver. CString essentially supports the +older UCS-2 version of Unicode via widechar_t as an application-wide compile +time switch. + +CString's also use built-in mechanisms for ensuring thread safety under all +situations. While this makes writing thread safe code that much easier, this +built-in safety feature has a price -- the inner loops of each CString method +runs in its own critical section (grabbing and releasing a light weight mutex +on every operation.) The usual way to decrease the impact of a critical +section performance penalty is to amortize more operations per critical +section. But since the implementation of CStrings is fixed as a one critical +section per-operation cost, there is no way to leverage this common +performance enhancing idea. + +The search facilities in Bstrlib are comparable to those in MFC's CString +class, though it is missing locale specific collation. But because Bstrlib +is interoperable with C's char buffers, it will allow programmers to write +their own string searching mechanism (such as Boyer-Moore), or be able to +choose from a variety of available existing string searching libraries (such +as those for regular expressions) without difficulty. + +Microsoft used a very non-ANSI conforming trick in its implementation to +allow printf() to use the "%s" specifier to output a CString correctly. This +can be convenient, but it is inherently not portable. CBString requires an +explicit cast, while bstring requires the data member to be dereferenced. +Microsoft's own documentation recommends casting, instead of relying on this +feature. + +Comparison with C++'s std::string +--------------------------------- + +This is the C++ language's standard STL based string class. + +1. There is no C implementation. +2. The [] operator is not bounds checked. +3. Missing a lot of useful functions like printf-like formatting. +4. Some sub-standard std::string implementations (SGI) are necessarily unsafe + to use with multithreading. +5. Limited by STL's std::iostream which in turn is limited by ifstream which + can only take input from files. (Compare to CBStream's API which can take + abstracted input.) +6. Extremely uneven performance across implementations. + +Comparison with ISO C TR 24731 proposal +--------------------------------------- + +Following the ISO C99 standard, Microsoft has proposed a group of C library +extensions which are supposedly "safer and more secure". This proposal is +expected to be adopted by the ISO C standard which follows C99. + +The proposal reveals itself to be very similar to Microsoft's "StrSafe" +library. The functions are basically the same as other standard C library +string functions except that destination parameters are paired with an +additional length parameter of type rsize_t. rsize_t is the same as size_t, +however, the range is checked to make sure its between 1 and RSIZE_MAX. Like +Bstrlib, the functions perform a "parameter check". Unlike Bstrlib, when a +parameter check fails, rather than simply outputing accumulatable error +statuses, they call a user settable global error function handler, and upon +return of control performs no (additional) detrimental action. The proposal +covers basic string functions as well as a few non-reenterable functions +(asctime, ctime, and strtok). + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) +2. No growable string semantics. +3. Requires manual buffer length synchronization in the source code. +4. No attempt to enhance functionality of the C library. +5. Introduces a new error scenario (strings exceeding RSIZE_MAX length). + +The hope is that by exposing the buffer length requirements there will be +fewer buffer overrun errors. However, the error modes are really just +transformed, rather than removed. The real problem of buffer overflows is +that they all happen as a result of erroneous programming. So forcing +programmers to manually deal with buffer limits, will make them more aware of +the problem but doesn't remove the possibility of erroneous programming. So +a programmer that erroneously mixes up the rsize_t parameters is no better off +from a programmer that introduces potential buffer overflows through other +more typical lapses. So at best this may reduce the rate of erroneous +programming, rather than making any attempt at removing failure modes. + +The error handler can discriminate between types of failures, but does not +take into account any callsite context. So the problem is that the error is +going to be manifest in a piece of code, but there is no pointer to that +code. It would seem that passing in the call site __FILE__, __LINE__ as +parameters would be very useful, but the API clearly doesn't support such a +thing (it would increase code bloat even more than the extra length +parameter does, and would require macro tricks to implement). + +The Bstrlib C API takes the position that error handling needs to be done at +the callsite, and just tries to make it as painless as possible. Furthermore, +error modes are removed by supporting auto-growing strings and aliasing. For +capturing errors in more central code fragments, Bstrlib's C++ API uses +exception handling extensively, which is superior to the leaf-only error +handler approach. + +Comparison with Managed String Library CERT proposal +---------------------------------------------------- + +The main webpage for the managed string library: +http://www.cert.org/secure-coding/managedstring.html + +Robert Seacord at CERT has proposed a C string library that he calls the +"Managed String Library" for C. Like Bstrlib, it introduces a new type +which is called a managed string. The structure of a managed string +(string_m) is like a struct tagbstring but missing the length field. This +internal structure is considered opaque. The length is, like the C standard +library, always computed on the fly by searching for a terminating NUL on +every operation that requires it. So it suffers from every performance +problem that the C standard library suffers from. Interoperating with C +string APIs (like printf, fopen, or anything else that takes a string +parameter) requires copying to additionally allocating buffers that have to +be manually freed -- this makes this library probably slower and more +cumbersome than any other string library in existence. + +The library gives a fully populated error status as the return value of every +string function. The hope is to be able to diagnose all problems +specifically from the return code alone. Comparing this to Bstrlib, which +aways returns one consistent error message, might make it seem that Bstrlib +would be harder to debug; but this is not true. With Bstrlib, if an error +occurs there is always enough information from just knowing there was an error +and examining the parameters to deduce exactly what kind of error has +happened. The managed string library thus gives up nested function calls +while achieving little benefit, while Bstrlib does not. + +One interesting feature that "managed strings" has is the idea of data +sanitization via character set whitelisting. That is to say, a globally +definable filter that makes any attempt to put invalid characters into strings +lead to an error and not modify the string. The author gives the following +example: + + // create valid char set + if (retValue = strcreate_m(&str1, "abc") ) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + if (retValue = setcharset(str1)) { + fprintf( + stderr, + "Error %d from setcharset().\n", + retValue + ); + } + if (retValue = strcreate_m(&str1, "aabbccabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + // create string with invalid char set + if (retValue = strcreate_m(&str1, "abbccdabc")) { + fprintf( + stderr, + "Error %d from strcreate_m.\n", + retValue + ); + } + +Which we can compare with a more Bstrlib way of doing things: + + bstring bCreateWithFilter (const char * cstr, const_bstring filter) { + bstring b = bfromcstr (cstr); + if (BSTR_ERR != bninchr (b, filter) && NULL != b) { + fprintf (stderr, "Filter violation.\n"); + bdestroy (b); + b = NULL; + } + return b; + } + + struct tagbstring charFilter = bsStatic ("abc"); + bstring str1 = bCreateWithFilter ("aabbccabc", &charFilter); + bstring str2 = bCreateWithFilter ("aabbccdabc", &charFilter); + +The first thing we should notice is that with the Bstrlib approach you can +have different filters for different strings if necessary. Furthermore, +selecting a charset filter in the Managed String Library is uni-contextual. +That is to say, there can only be one such filter active for the entire +program, which means its usage is not well defined for intermediate library +usage (a library that uses it will interfere with user code that uses it, and +vice versa.) It is also likely to be poorly defined in multi-threading +environments. + +There is also a question as to whether the data sanitization filter is checked +on every operation, or just on creation operations. Since the charset can be +set arbitrarily at run time, it might be set *after* some managed strings have +been created. This would seem to imply that all functions should run this +additional check every time if there is an attempt to enforce this. This +would make things tremendously slow. On the other hand, if it is assumed that +only creates and other operations that take char *'s as input need be checked +because the charset was only supposed to be called once at and before any +other managed string was created, then one can see that its easy to cover +Bstrlib with equivalent functionality via a few wrapper calls such as the +example given above. + +And finally we have to question the value of sanitation in the first place. +For example, for httpd servers, there is generally a requirement that the +URLs parsed have some form that avoids undesirable translation to local file +system filenames or resources. The problem is that the way URLs can be +encoded, it must be completely parsed and translated to know if it is using +certain invalid character combinations. That is to say, merely filtering +each character one at a time is not necessarily the right way to ensure that +a string has safe contents. + +In the article that describes this proposal, it is claimed that it fairly +closely approximates the existing C API semantics. On this point we should +compare this "closeness" with Bstrlib: + + Bstrlib Managed String Library + ------- ---------------------- + +Pointer arithmetic Segment arithmetic N/A + +Use in C Std lib ->data, or bdata{e} getstr_m(x,*) ... free(x) + +String literals bsStatic, bsStaticBlk strcreate_m() + +Transparency Complete None + +Its pretty clear that the semantic mapping from C strings to Bstrlib is fairly +straightforward, and that in general semantic capabilities are the same or +superior in Bstrlib. On the other hand the Managed String Library is either +missing semantics or changes things fairly significantly. + +Comparison with Annexia's c2lib library +--------------------------------------- + +This library is available at: +http://www.annexia.org/freeware/c2lib + +1. Still based solely on char * buffers (and therefore strlen() and strcat() + is still O(n), and there are no faster streq() comparison functions.) + Their suggestion that alternatives which wrap the string data type (such as + bstring does) imposes a difficulty in interoperating with the C langauge's + ordinary C string library is not founded. +2. Introduction of memory (and vector?) abstractions imposes a learning + curve, and some kind of memory usage policy that is outside of the strings + themselves (and therefore must be maintained by the developer.) +3. The API is massive, and filled with all sorts of trivial (pjoin) and + controvertial (pmatch -- regular expression are not sufficiently + standardized, and there is a very large difference in performance between + compiled and non-compiled, REs) functions. Bstrlib takes a decidely + minimal approach -- none of the functionality in c2lib is difficult or + challenging to implement on top of Bstrlib (except the regex stuff, which + is going to be difficult, and controvertial no matter what.) +4. Understanding why c2lib is the way it is pretty much requires a working + knowledge of Perl. bstrlib requires only knowledge of the C string library + while providing just a very select few worthwhile extras. +5. It is attached to a lot of cruft like a matrix math library (that doesn't + include any functions for getting the determinant, eigenvectors, + eigenvalues, the matrix inverse, test for singularity, test for + orthogonality, a grahm schmit orthogonlization, LU decomposition ... I + mean why bother?) + +Convincing a development house to use c2lib is likely quite difficult. It +introduces too much, while not being part of any kind of standards body. The +code must therefore be trusted, or maintained by those that use it. While +bstring offers nothing more on this front, since its so much smaller, covers +far less in terms of scope, and will typically improve string performance, +the barrier to usage should be much smaller. + +Comparison with stralloc/qmail +------------------------------ + +More information about this library can be found here: +http://www.canonical.org/~kragen/stralloc.html or here: +http://cr.yp.to/lib/stralloc.html + +1. Library is very very minimal. A little too minimal. +2. Untargetted source parameters are not declared const. +3. Slightly different expected emphasis (like _cats function which takes an + ordinary C string char buffer as a parameter.) Its clear that the + remainder of the C string library is still required to perform more + useful string operations. + +The struct declaration for their string header is essentially the same as that +for bstring. But its clear that this was a quickly written hack whose goals +are clearly a subset of what Bstrlib supplies. For anyone who is served by +stralloc, Bstrlib is complete substitute that just adds more functionality. + +stralloc actually uses the interesting policy that a NULL data pointer +indicates an empty string. In this way, non-static empty strings can be +declared without construction. This advantage is minimal, since static empty +bstrings can be declared inline without construction, and if the string needs +to be written to it should be constructed from an empty string (or its first +initializer) in any event. + +wxString class +-------------- + +This is the string class used in the wxWindows project. A description of +wxString can be found here: +http://www.wxwindows.org/manuals/2.4.2/wx368.htm#wxstring + +This C++ library is similar to CBString. However, it is littered with +trivial functions (IsAscii, UpperCase, RemoveLast etc.) + +1. There is no C implementation. +2. The memory management strategy is to allocate a bounded fixed amount of + additional space on each resize, meaning that it does not have the + log_2(n) property that Bstrlib has (it will thrash very easily, cause + massive fragmentation in common heap implementations, and can easily be a + common source of performance problems). +3. The library uses a "copy on write" strategy, meaning that it has to deal + with multithreading problems. + +Vstr +---- + +This is a highly orthogonal C string library with an emphasis on +networking/realtime programming. It can be found here: +http://www.and.org/vstr/ + +1. The convoluted internal structure does not contain a '\0' char * compatible + buffer, so interoperability with the C library a non-starter. +2. The API and implementation is very large (owing to its orthogonality) and + can lead to difficulty in understanding its exact functionality. +3. An obvious dependency on gnu tools (confusing make configure step) +4. Uses a reference counting system, meaning that it is not likely to be + thread safe. + +The implementation has an extreme emphasis on performance for nontrivial +actions (adds, inserts and deletes are all constant or roughly O(#operations) +time) following the "zero copy" principle. This trades off performance of +trivial functions (character access, char buffer access/coersion, alias +detection) which becomes significantly slower, as well as incremental +accumulative costs for its searching/parsing functions. Whether or not Vstr +wins any particular performance benchmark will depend a lot on the benchmark, +but it should handily win on some, while losing dreadfully on others. + +The learning curve for Vstr is very steep, and it doesn't come with any +obvious way to build for Windows or other platforms without gnu tools. At +least one mechanism (the iterator) introduces a new undefined scenario +(writing to a Vstr while iterating through it.) Vstr has a very large +footprint, and is very ambitious in its total functionality. Vstr has no C++ +API. + +Vstr usage requires context initialization via vstr_init() which must be run +in a thread-local context. Given the totally reference based architecture +this means that sharing Vstrings across threads is not well defined, or at +least not safe from race conditions. This API is clearly geared to the older +standard of fork() style multitasking in UNIX, and is not safely transportable +to modern shared memory multithreading available in Linux and Windows. There +is no portable external solution making the library thread safe (since it +requires a mutex around each Vstr context -- not each string.) + +In the documentation for this library, a big deal is made of its self hosted +s(n)printf-like function. This is an issue for older compilers that don't +include vsnprintf(), but also an issue because Vstr has a slow conversion to +'\0' terminated char * mechanism. That is to say, using "%s" to format data +that originates from Vstr would be slow without some sort of native function +to do so. Bstrlib sidesteps the issue by relying on what snprintf-like +functionality does exist and having a high performance conversion to a char * +compatible string so that "%s" can be used directly. + +Str Library +----------- + +This is a fairly extensive string library, that includes full unicode support +and targetted at the goal of out performing MFC and STL. The architecture, +similarly to MFC's CStrings, is a copy on write reference counting mechanism. + +http://www.utilitycode.com/str/default.aspx + +1. Commercial. +2. C++ only. + +This library, like Vstr, uses a ref counting system. There is only so deeply +I can analyze it, since I don't have a license for it. However, performance +improvements over MFC's and STL, doesn't seem like a sufficient reason to +move your source base to it. For example, in the future, Microsoft may +improve the performance CString. + +It should be pointed out that performance testing of Bstrlib has indicated +that its relative performance advantage versus MFC's CString and STL's +std::string is at least as high as that for the Str library. + +libmib astrings +--------------- + +A handful of functional extensions to the C library that add dynamic string +functionality. +http://www.mibsoftware.com/libmib/astring/ + +This package basically references strings through char ** pointers and assumes +they are pointing to the top of an allocated heap entry (or NULL, in which +case memory will be newly allocated from the heap.) So its still up to user +to mix and match the older C string functions with these functions whenever +pointer arithmetic is used (i.e., there is no leveraging of the type system +to assert semantic differences between references and base strings as Bstrlib +does since no new types are introduced.) Unlike Bstrlib, exact string length +meta data is not stored, thus requiring a strlen() call on *every* string +writing operation. The library is very small, covering only a handful of C's +functions. + +While this is better than nothing, it is clearly slower than even the +standard C library, less safe and less functional than Bstrlib. + +To explain the advantage of using libmib, their website shows an example of +how dangerous C code: + + char buf[256]; + char *pszExtraPath = ";/usr/local/bin"; + + strcpy(buf,getenv("PATH")); /* oops! could overrun! */ + strcat(buf,pszExtraPath); /* Could overrun as well! */ + + printf("Checking...%s\n",buf); /* Some printfs overrun too! */ + +is avoided using libmib: + + char *pasz = 0; /* Must initialize to 0 */ + char *paszOut = 0; + char *pszExtraPath = ";/usr/local/bin"; + + if (!astrcpy(&pasz,getenv("PATH"))) /* malloc error */ exit(-1); + if (!astrcat(&pasz,pszExtraPath)) /* malloc error */ exit(-1); + + /* Finally, a "limitless" printf! we can use */ + asprintf(&paszOut,"Checking...%s\n",pasz);fputs(paszOut,stdout); + + astrfree(&pasz); /* Can use free(pasz) also. */ + astrfree(&paszOut); + +However, compare this to Bstrlib: + + bstring b, out; + + bcatcstr (b = bfromcstr (getenv ("PATH")), ";/usr/local/bin"); + out = bformat ("Checking...%s\n", bdatae (b, "")); + /* if (out && b) */ fputs (bdatae (out, ""), stdout); + bdestroy (b); + bdestroy (out); + +Besides being shorter, we can see that error handling can be deferred right +to the very end. Also, unlike the above two versions, if getenv() returns +with NULL, the Bstrlib version will not exhibit undefined behavior. +Initialization starts with the relevant content rather than an extra +autoinitialization step. + +libclc +------ + +An attempt to add to the standard C library with a number of common useful +functions, including additional string functions. +http://libclc.sourceforge.net/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Adds no safety or memory management whatsoever. +3. Most of the supplied string functions are completely trivial. + +The goals of libclc and Bstrlib are clearly quite different. + +fireString +---------- + +http://firestuff.org/ + +1. Uses standard char * buffer, and adopts C 99's usage of "restrict" to pass + the responsibility to guard against aliasing to the programmer. +2. Mixes char * and length wrapped buffers (estr) functions, doubling the API + size, with safety limited to only half of the functions. + +Firestring was originally just a wrapper of char * functionality with extra +length parameters. However, it has been augmented with the inclusion of the +estr type which has similar functionality to stralloc. But firestring does +not nearly cover the functional scope of Bstrlib. + +Safe C String Library +--------------------- + +A library written for the purpose of increasing safety and power to C's string +handling capabilities. +http://www.zork.org/safestr/safestr.html + +1. While the safestr_* functions are safe in of themselves, interoperating + with char * string has dangerous unsafe modes of operation. +2. The architecture of safestr's causes the base pointer to change. Thus, + its not practical/safe to store a safestr in multiple locations if any + single instance can be manipulated. +3. Dependent on an additional error handling library. +4. Uses reference counting, meaning that it is either not thread safe or + slow and not portable. + +I think the idea of reallocating (and hence potentially changing) the base +pointer is a serious design flaw that is fatal to this architecture. True +safety is obtained by having automatic handling of all common scenarios +without creating implicit constraints on the user. + +Because of its automatic temporary clean up system, it cannot use "const" +semantics on input arguments. Interesting anomolies such as: + + safestr_t s, t; + s = safestr_replace (t = SAFESTR_TEMP ("This is a test"), + SAFESTR_TEMP (" "), SAFESTR_TEMP (".")); + /* t is now undefined. */ + +are possible. If one defines a function which takes a safestr_t as a +parameter, then the function would not know whether or not the safestr_t is +defined after it passes it to a safestr library function. The author +recommended method for working around this problem is to examine the +attributes of the safestr_t within the function which is to modify any of +its parameters and play games with its reference count. I think, therefore, +that the whole SAFESTR_TEMP idea is also fatally broken. + +The library implements immutability, optional non-resizability, and a "trust" +flag. This trust flag is interesting, and suggests that applying any +arbitrary sequence of safestr_* function calls on any set of trusted strings +will result in a trusted string. It seems to me, however, that if one wanted +to implement a trusted string semantic, one might do so by actually creating +a different *type* and only implement the subset of string functions that are +deemed safe (i.e., user input would be excluded, for example.) This, in +essence, would allow the compiler to enforce trust propogation at compile +time rather than run time. Non-resizability is also interesting, however, +it seems marginal (i.e., to want a string that cannot be resized, yet can be +modified and yet where a fixed sized buffer is undesirable.) + +=============================================================================== + +Examples +-------- + + Dumping a line numbered file: + + FILE * fp; + int i, ret; + struct bstrList * lines; + struct tagbstring prefix = bsStatic ("-> "); + + if (NULL != (fp = fopen ("bstrlib.txt", "rb"))) { + bstring b = bread ((bNread) fread, fp); + fclose (fp); + if (NULL != (lines = bsplit (b, '\n'))) { + for (i=0; i < lines->qty; i++) { + binsert (lines->entry[i], 0, &prefix, '?'); + printf ("%04d: %s\n", i, bdatae (lines->entry[i], "NULL")); + } + bstrListDestroy (lines); + } + bdestroy (b); + } + +For numerous other examples, see bstraux.c, bstraux.h and the example archive. + +=============================================================================== + +License +------- + +This is a fork of The Better String Library, licensed under the 3-clause BSD +license (see the accompanying license.txt). The original work is available under +either the 3 clause BSD license or the Gnu Public License version 2 at the option +of the user. + +=============================================================================== + +Acknowledgements +---------------- + +The following individuals have made significant contributions to the design +and testing of the Better String Library: + +Bjorn Augestad +Clint Olsen +Darryl Bleau +Fabian Cenedese +Graham Wideman +Ignacio Burgueno +International Business Machines Corporation +Ira Mica +John Kortink +Manuel Woelker +Marcel van Kervinck +Michael Hsieh +Richard A. Smith +Simon Ekstrom +Wayne Scott + +=============================================================================== diff --git a/third_party/HLSLcc/src/cbstring/license.txt b/third_party/HLSLcc/src/cbstring/license.txt new file mode 100644 index 0000000..cf78a98 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/license.txt @@ -0,0 +1,29 @@ +Copyright (c) 2002-2008 Paul Hsieh +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + + Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + + Redistributions in binary form must reproduce the above copyright notice, + this list of conditions and the following disclaimer in the documentation + and/or other materials provided with the distribution. + + Neither the name of bstrlib nor the names of its contributors may be used + to endorse or promote products derived from this software without + specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" +AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE +LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE +POSSIBILITY OF SUCH DAMAGE. + diff --git a/third_party/HLSLcc/src/cbstring/porting.txt b/third_party/HLSLcc/src/cbstring/porting.txt new file mode 100644 index 0000000..11d8d13 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/porting.txt @@ -0,0 +1,172 @@ +Better String library Porting Guide +----------------------------------- + +by Paul Hsieh + +The bstring library is an attempt to provide improved string processing +functionality to the C and C++ language. At the heart of the bstring library +is the management of "bstring"s which are a significant improvement over '\0' +terminated char buffers. See the accompanying documenation file bstrlib.txt +for more information. + +=============================================================================== + +Identifying the Compiler +------------------------ + +Bstrlib has been tested on the following compilers: + + Microsoft Visual C++ + Watcom C/C++ (32 bit flat) + Intel's C/C++ compiler (on Windows) + The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64) + Borland C++ + Turbo C + +There are slight differences in these compilers which requires slight +differences in the implementation of Bstrlib. These are accomodated in the +same sources using #ifdef/#if defined() on compiler specific macros. To +port Bstrlib to a new compiler not listed above, it is recommended that the +same strategy be followed. If you are unaware of the compiler specific +identifying preprocessor macro for your compiler you might find it here: + +http://predef.sourceforge.net/precomp.html + +Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER. + +16-bit vs. 32-bit vs. 64-bit Systems +------------------------------------ + +Bstrlib has been architected to deal with strings of length between 0 and +INT_MAX (inclusive). Since the values of int are never higher than size_t +there will be no issue here. Note that on most 64-bit systems int is 32-bit. + +Dependency on The C-Library +--------------------------- + +Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and +vsnprintf. Many free standing C compiler implementations that have a mode in +which the C library is not available will typically not include these +functions which will make porting Bstrlib to it onerous. Bstrlib is not +designed for such bare bones compiler environments. This usually includes +compilers that target ROM environments. + +Porting Issues +-------------- + +Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there +are still a few porting issues. These are described below. + +1. The vsnprintf () function. + +Unfortunately, the earlier ANSI/ISO C standards did not include this function. +If the compiler of interest does not support this function then the +BSTRLIB_NOVSNP should be defined via something like: + + #if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP) + # if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__) + # define BSTRLIB_NOVSNP + # endif + #endif + +which appears at the top of bstrlib.h. Note that the bformat(a) functions +will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If +the compiler has renamed vsnprintf() to some other named function, then +search for the definition of the exvsnprintf macro in bstrlib.c file and be +sure its defined appropriately: + + #if defined (__COMPILERVENDORSPECIFICMACRO__) + # define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);} + #else + # define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);} + #endif + +Take notice of the return value being captured in the variable r. It is +assumed that r exceeds n if and only if the underlying vsnprintf function has +determined what the true maximal output length would be for output if the +buffer were large enough to hold it. Non-modern implementations must output a +lesser number (the macro can and should be modified to ensure this). + +2. Weak C++ compiler. + +C++ is a much more complicated language to implement than C. This has lead +to varying quality of compiler implementations. The weaknesses isolated in +the initial ports are inclusion of the Standard Template Library, +std::iostream and exception handling. By default it is assumed that the C++ +compiler supports all of these things correctly. If your compiler does not +support one or more of these define the corresponding macro: + + BSTRLIB_CANNOT_USE_STL + BSTRLIB_CANNOT_USE_IOSTREAM + BSTRLIB_DOESNT_THROW_EXCEPTIONS + +The compiler specific detected macro should be defined at the top of +bstrwrap.h in the Configuration defines section. Note that these disabling +macros can be overrided with the associated enabling macro if a subsequent +version of the compiler gains support. (For example, its possible to rig +up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL +can be passed in as a compiler option.) + +3. The bsafe module, and reserved words. + +The bsafe module is in gross violation of the ANSI/ISO C standard in the +sense that it redefines what could be implemented as reserved words on a +given compiler. The typical problem is that a compiler may inline some of the +functions and thus not be properly overridden by the definitions in the bsafe +module. It is also possible that a compiler may prohibit the redefinitions in +the bsafe module. Compiler specific action will be required to deal with +these situations. + +Platform Specific Files +----------------------- + +The makefiles for the examples are basically setup of for particular +environments for each platform. In general these makefiles are not portable +and should be constructed as necessary from scratch for each platform. + +Testing a port +-------------- + +To test that a port compiles correctly do the following: + +1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and + bsafe modules. +2. Compile bstest against the bstrlib module. +3. Run bstest and ensure that 0 errors are reported. +4. Compile test against the bstrlib and bstrwrap modules. +5. Run test and ensure that 0 errors are reported. +6. Compile each of the examples (except for the "re" example, which may be + complicated and is not a real test of bstrlib and except for the mfcbench + example which is Windows specific.) +7. Run each of the examples. + +The builds must have 0 errors, and should have the absolute minimum number of +warnings (in most cases can be reduced to 0.) The result of execution should +be essentially identical on each platform. + +Performance +----------- + +Different CPU and compilers have different capabilities in terms of +performance. It is possible for Bstrlib to assume performance +characteristics that a platform doesn't have (since it was primarily +developed on just one platform). The goal of Bstrlib is to provide very good +performance on all platforms regardless of this but without resorting to +extreme measures (such as using assembly language, or non-portable intrinsics +or library extensions.) + +There are two performance benchmarks that can be found in the example/ +directory. They are: cbench.c and cppbench.cpp. These are variations and +expansions of a benchmark for another string library. They don't cover all +string functionality, but do include the most basic functions which will be +common in most string manipulation kernels. + +............................................................................... + +Feedback +-------- + +In all cases, you may email issues found to the primary author of Bstrlib at +the email address: websnarf@users.sourceforge.net + +=============================================================================== diff --git a/third_party/HLSLcc/src/cbstring/security.txt b/third_party/HLSLcc/src/cbstring/security.txt new file mode 100644 index 0000000..9761409 --- /dev/null +++ b/third_party/HLSLcc/src/cbstring/security.txt @@ -0,0 +1,221 @@ +Better String library Security Statement +---------------------------------------- + +by Paul Hsieh + +=============================================================================== + +Introduction +------------ + +The Better String library (hereafter referred to as Bstrlib) is an attempt to +provide improved string processing functionality to the C and C++ languages. +At the heart of the Bstrlib is the management of "bstring"s which are a +significant improvement over '\0' terminated char buffers. See the +accompanying documenation file bstrlib.txt for more information. + +DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND +CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT +NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Like any software, there is always a possibility of failure due to a flawed +implementation. Nevertheless a good faith effort has been made to minimize +such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an +application secure or free from implementation failures. However, it is the +author's conviction that use of Bstrlib can greatly facilitate the creation +of software meeting the highest possible standards of security. + +Part of the reason why this document has been created, is for the purpose of +security auditing, or the creation of further "Statements on Security" for +software that is created that uses Bstrlib. An auditor may check the claims +below against Bstrlib, and use this as a basis for analysis of software which +uses Bstrlib. + +=============================================================================== + +Statement on Security +--------------------- + +This is a document intended to give consumers of the Better String Library +who are interested in security an idea of where the Better String Library +stands on various security issues. Any deviation observed in the actual +library itself from the descriptions below should be considered an +implementation error, not a design flaw. + +This statement is not an analytical proof of correctness or an outline of one +but rather an assertion similar to a scientific claim or hypothesis. By use, +testing and open independent examination (otherwise known as scientific +falsifiability), the credibility of the claims made below can rise to the +level of an established theory. + +Common security issues: +....................... + +1. Buffer Overflows + +The Bstrlib API allows the programmer a way to deal with strings without +having to deal with the buffers containing them. Ordinary usage of the +Bstrlib API itself makes buffer overflows impossible. + +Furthermore, the Bstrlib API has a superset of basic string functionality as +compared to the C library's char * functions, C++'s std::string class and +Microsoft's MFC based CString class. It also has abstracted mechanisms for +dealing with IO. This is important as it gives developers a way of migrating +all their code from a functionality point of view. + +2. Memory size overflow/wrap around attack + +Bstrlib is, by design, impervious to memory size overflow attacks. The +reason is it is resiliant to length overflows is that bstring lengths are +bounded above by INT_MAX, instead of ~(size_t)0. So length addition +overflows cause a wrap around of the integer value making them negative +causing balloc() to fail before an erroneous operation can occurr. Attempted +conversions of char * strings which may have lengths greater than INT_MAX are +detected and the conversion is aborted. + +It is unknown if this property holds on machines that don't represent +integers as 2s complement. It is recommended that Bstrlib be carefully +auditted by anyone using a system which is not 2s complement based. + +3. Constant string protection + +Bstrlib implements runtime enforced constant and read-only string semantics. +I.e., bstrings which are declared as constant via the bsStatic() macro cannot +be modified or deallocated directly through the Bstrlib API, and this cannot +be subverted by casting or other type coercion. This is independent of the +use of the const_bstring data type. + +The Bstrlib C API uses the type const_bstring to specify bstring parameters +whose contents do not change. Although the C language cannot enforce this, +this is nevertheless guaranteed by the implementation of the Bstrlib library +of C functions. The C++ API enforces the const attribute on CBString types +correctly. + +4. Aliased bstring support + +Bstrlib detects and supports aliased parameter management throughout the API. +The kind of aliasing that is allowed is the one where pointers of the same +basic type may be pointing to overlapping objects (this is the assumption the +ANSI C99 specification makes.) Each function behaves as if all read-only +parameters were copied to temporaries which are used in their stead before +the function is enacted (it rarely actually does this). No function in the +Bstrlib uses the "restrict" parameter attribute from the ANSI C99 +specification. + +5. Information leaking + +In bstraux.h, using the semantically equivalent macros bSecureDestroy() and +bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively +will ensure that stale data does not linger in the heap's free space after +strings have been released back to memory. Created bstrings or CBStrings +are not linked to anything external to themselves, and thus cannot expose +deterministic data leaking. If a bstring is resized, the preimage may exist +as a copy that is released to the heap. Thus for sensitive data, the bstring +should be sufficiently presized before manipulated so that it is not resized. +bSecureInput() has been supplied in bstraux.c, which can be used to obtain +input securely without any risk of leaving any part of the input image in the +heap except for the allocated bstring that is returned. + +6. Memory leaking + +Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG +macro. User generated definitions for malloc, realloc and free can then be +supplied which can implement special strategies for memory corruption +detection or memory leaking. Otherwise, bstrlib does not do anything out of +the ordinary to attempt to deal with the standard problem of memory leaking +(i.e., losing references to allocated memory) when programming in the C and +C++ languages. However, it does not compound the problem any more than exists +either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib +does not preclude the use of automatic garbage collection mechanisms such as +the Boehm garbage collector. + +7. Encryption + +Bstrlib does not present any built-in encryption mechanism. However, it +supports full binary contents in its data buffers, so any standard block +based encryption mechanism can make direct use of bstrings/CBStrings for +buffer management. + +8. Double freeing + +Freeing a pointer that is already free is an extremely rare, but nevertheless +a potentially ruthlessly corrupting operation (its possible to cause Win 98 to +reboot, by calling free mulitiple times on already freed data using the WATCOM +CRT.) Bstrlib invalidates the bstring header data before freeing, so that in +many cases a double free will be detected and an error will be reported +(though this behaviour is not guaranteed and should not be relied on). + +Using bstrFree pervasively (instead of bdestroy) can lead to somewhat +improved invalid free avoidance (it is completely safe whenever bstring +instances are only stored in unique variables). For example: + + struct tagbstring hw = bsStatic ("Hello, world"); + bstring cpHw = bstrcpy (&hw); + + #ifdef NOT_QUITE_AS_SAFE + bdestroy (cpHw); /* Never fail */ + bdestroy (cpHw); /* Error sometimes detected at runtime */ + bdestroy (&hw); /* Error detected at run time */ + #else + bstrFree (cpHw); /* Never fail */ + bstrFree (cpHw); /* Will do nothing */ + bstrFree (&hw); /* Will lead to a compile time error */ + #endif + +9. Resource based denial of service + +bSecureInput() has been supplied in bstraux.c. It has an optional upper limit +for input length. But unlike fgets(), it is also easily determined if the +buffer has been truncated early. In this way, a program can set an upper limit +on input sizes while still allowing for implementing context specific +truncation semantics (i.e., does the program consume but dump the extra +input, or does it consume it in later inputs?) + +10. Mixing char *'s and bstrings + +The bstring and char * representations are not identical. So there is a risk +when converting back and forth that data may lost. Essentially bstrings can +contain '\0' as a valid non-terminating character, while char * strings +cannot and in fact must use the character as a terminator. The risk of data +loss is very low, since: + + A) the simple method of only using bstrings in a char * semantically + compatible way is both easy to achieve and pervasively supported. + B) obtaining '\0' content in a string is either deliberate or indicative + of another, likely more serious problem in the code. + C) the library comes with various functions which deal with this issue + (namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ()) + +Marginal security issues: +......................... + +11. 8-bit versus 9-bit portability + +Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent +possible to avoid portability problems. However, Bstrlib has not been tested +on any system that does not represent char as 8-bits. So whether or not it +works on 9-bit systems is an open question. It is recommended that Bstrlib be +carefully auditted by anyone using a system in which CHAR_BIT is not 8. + +12. EBCDIC/ASCII/UTF-8 data representation attacks. + +Bstrlib uses ctype.h functions to ensure that it remains portable to non- +ASCII systems. It also checks range to make sure it is well defined even for +data that ANSI does not define for the ctype functions. + +Obscure issues: +............... + +13. Data attributes + +There is no support for a Perl-like "taint" attribute, however, an example of +how to do this using C++'s type system is given as an example. + diff --git a/third_party/HLSLcc/src/decode.cpp b/third_party/HLSLcc/src/decode.cpp new file mode 100644 index 0000000..b0622b3 --- /dev/null +++ b/third_party/HLSLcc/src/decode.cpp @@ -0,0 +1,1635 @@ +#include "internal_includes/tokens.h" +#include "internal_includes/decode.h" +#include "stdlib.h" +#include "stdio.h" +#include "internal_includes/reflect.h" +#include "internal_includes/debug.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" + +#define FOURCC(a, b, c, d) ((uint32_t)(uint8_t)(a) | ((uint32_t)(uint8_t)(b) << 8) | ((uint32_t)(uint8_t)(c) << 16) | ((uint32_t)(uint8_t)(d) << 24 )) +enum { FOURCC_DXBC = FOURCC('D', 'X', 'B', 'C') }; //DirectX byte code +enum { FOURCC_SHDR = FOURCC('S', 'H', 'D', 'R') }; //Shader model 4 code +enum { FOURCC_SHEX = FOURCC('S', 'H', 'E', 'X') }; //Shader model 5 code +enum { FOURCC_RDEF = FOURCC('R', 'D', 'E', 'F') }; //Resource definition (e.g. constant buffers) +enum { FOURCC_ISGN = FOURCC('I', 'S', 'G', 'N') }; //Input signature +enum { FOURCC_IFCE = FOURCC('I', 'F', 'C', 'E') }; //Interface (for dynamic linking) +enum { FOURCC_OSGN = FOURCC('O', 'S', 'G', 'N') }; //Output signature +enum { FOURCC_PSGN = FOURCC('P', 'C', 'S', 'G') }; //Patch-constant signature + +enum { FOURCC_ISG1 = FOURCC('I', 'S', 'G', '1') }; //Input signature with Stream and MinPrecision +enum { FOURCC_OSG1 = FOURCC('O', 'S', 'G', '1') }; //Output signature with Stream and MinPrecision +enum { FOURCC_OSG5 = FOURCC('O', 'S', 'G', '5') }; //Output signature with Stream +enum { FOURCC_PSG1 = FOURCC('P', 'S', 'G', '1') }; //Patch constant signature with MinPrecision + +enum { FOURCC_STAT = FOURCC('S', 'T', 'A', 'T') }; // Chunks that we ignore +enum { FOURCC_SFI0 = FOURCC('S', 'F', 'I', '0') }; // Chunks that we ignore + + +typedef struct DXBCContainerHeaderTAG +{ + unsigned fourcc; + uint32_t unk[4]; + uint32_t one; + uint32_t totalSize; + uint32_t chunkCount; +} DXBCContainerHeader; + +typedef struct DXBCChunkHeaderTAG +{ + unsigned fourcc; + unsigned size; +} DXBCChunkHeader; + +#ifdef _DEBUG +static uint64_t operandID = 0; +static uint64_t instructionID = 0; +#endif + +void DecodeNameToken(const uint32_t* pui32NameToken, Operand* psOperand) +{ + psOperand->eSpecialName = DecodeOperandSpecialName(*pui32NameToken); + switch (psOperand->eSpecialName) + { + case NAME_UNDEFINED: + { + psOperand->specialName = "undefined"; + break; + } + case NAME_POSITION: + { + psOperand->specialName = "position"; + break; + } + case NAME_CLIP_DISTANCE: + { + psOperand->specialName = "clipDistance"; + break; + } + case NAME_CULL_DISTANCE: + { + psOperand->specialName = "cullDistance"; + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + psOperand->specialName = "renderTargetArrayIndex"; + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + psOperand->specialName = "viewportArrayIndex"; + break; + } + case NAME_VERTEX_ID: + { + psOperand->specialName = "vertexID"; + break; + } + case NAME_PRIMITIVE_ID: + { + psOperand->specialName = "primitiveID"; + break; + } + case NAME_INSTANCE_ID: + { + psOperand->specialName = "instanceID"; + break; + } + case NAME_IS_FRONT_FACE: + { + psOperand->specialName = "isFrontFace"; + break; + } + case NAME_SAMPLE_INDEX: + { + psOperand->specialName = "sampleIndex"; + break; + } + //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + + //For the triangular domain, there are 4 factors (3 sides, 1 inner) + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + + //For the isoline domain, there are 2 factors (detail and density). + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + psOperand->specialName = "tessFactor"; + break; + } + default: + { + ASSERT(0); + break; + } + } +} + +// Find the declaration of the texture described by psTextureOperand and +// mark it as a shadow type. (e.g. accessed via sampler2DShadow rather than sampler2D) +static void MarkTextureAsShadow(ShaderInfo* psShaderInfo, std::vector &declarations, const Operand* psTextureOperand) +{ + ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); + + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + { + if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) + { + if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) + { + psDecl->ui32IsShadowTex = 1; + break; + } + } + } +} + +static void MarkTextureSamplerPair(ShaderInfo* psShaderInfo, std::vector & declarations, const Operand* psTextureOperand, const Operand* psSamplerOperand, TextureSamplerPairs& samplers) +{ + ASSERT(psTextureOperand->eType == OPERAND_TYPE_RESOURCE); + ASSERT(psSamplerOperand->eType == OPERAND_TYPE_SAMPLER); + + for (std::vector::iterator psDecl = declarations.begin(); psDecl != declarations.end(); psDecl++) + { + if (psDecl->eOpcode == OPCODE_DCL_RESOURCE) + { + if (psDecl->asOperands[0].eType == OPERAND_TYPE_RESOURCE && + psDecl->asOperands[0].ui32RegisterNumber == psTextureOperand->ui32RegisterNumber) + { + // psDecl is the texture resource referenced by psTextureOperand + + // add psSamplerOperand->ui32RegisterNumber to list of samplers that use this texture + // set::insert returns a pair of which .second tells whether a new element was actually added + if (psDecl->samplersUsed.insert(psSamplerOperand->ui32RegisterNumber).second) + { + // Record the TEX_with_SMP string in the TextureSamplerPair array that we return to the client + std::string combinedname = TextureSamplerName(psShaderInfo, psTextureOperand->ui32RegisterNumber, psSamplerOperand->ui32RegisterNumber, psDecl->ui32IsShadowTex); + samplers.push_back(combinedname); + } + break; + } + } + } +} + +uint32_t DecodeOperand(const uint32_t *pui32Tokens, Operand* psOperand) +{ + int i; + uint32_t ui32NumTokens = 1; + OPERAND_NUM_COMPONENTS eNumComponents; + +#ifdef _DEBUG + psOperand->id = operandID++; +#endif + + //Some defaults + psOperand->iWriteMaskEnabled = 1; + psOperand->iGSInput = 0; + psOperand->iPSInOut = 0; + psOperand->aeDataType[0] = SVT_FLOAT; + psOperand->aeDataType[1] = SVT_FLOAT; + psOperand->aeDataType[2] = SVT_FLOAT; + psOperand->aeDataType[3] = SVT_FLOAT; + + psOperand->iExtended = DecodeIsOperandExtended(*pui32Tokens); + + + psOperand->eModifier = OPERAND_MODIFIER_NONE; + psOperand->m_SubOperands[0].reset(); + psOperand->m_SubOperands[1].reset(); + psOperand->m_SubOperands[2].reset(); + + psOperand->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT; + + /* Check if this instruction is extended. If it is, + * we need to print the information first */ + if (psOperand->iExtended) + { + /* OperandToken1 is the second token */ + ui32NumTokens++; + + if (DecodeExtendedOperandType(pui32Tokens[1]) == EXTENDED_OPERAND_MODIFIER) + { + psOperand->eModifier = DecodeExtendedOperandModifier(pui32Tokens[1]); + psOperand->eMinPrecision = (OPERAND_MIN_PRECISION)DecodeOperandMinPrecision(pui32Tokens[1]); + } + } + + psOperand->iIndexDims = DecodeOperandIndexDimension(*pui32Tokens); + psOperand->eType = DecodeOperandType(*pui32Tokens); + + psOperand->ui32RegisterNumber = 0; + + eNumComponents = DecodeOperandNumComponents(*pui32Tokens); + + if (psOperand->eType == OPERAND_TYPE_INPUT_GS_INSTANCE_ID) + { + eNumComponents = OPERAND_1_COMPONENT; + psOperand->aeDataType[0] = SVT_UINT; + } + + switch (eNumComponents) + { + case OPERAND_1_COMPONENT: + { + psOperand->iNumComponents = 1; + break; + } + case OPERAND_4_COMPONENT: + { + psOperand->iNumComponents = 4; + break; + } + default: + { + psOperand->iNumComponents = 0; + break; + } + } + + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents == 4) + { + psOperand->eSelMode = DecodeOperand4CompSelMode(*pui32Tokens); + + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + psOperand->ui32CompMask = DecodeOperand4CompMask(*pui32Tokens); + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + psOperand->ui32Swizzle = DecodeOperand4CompSwizzle(*pui32Tokens); + + if (psOperand->ui32Swizzle != NO_SWIZZLE) + { + psOperand->aui32Swizzle[0] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 0); + psOperand->aui32Swizzle[1] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 1); + psOperand->aui32Swizzle[2] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 2); + psOperand->aui32Swizzle[3] = DecodeOperand4CompSwizzleSource(*pui32Tokens, 3); + } + else + { + psOperand->aui32Swizzle[0] = OPERAND_4_COMPONENT_X; + psOperand->aui32Swizzle[1] = OPERAND_4_COMPONENT_Y; + psOperand->aui32Swizzle[2] = OPERAND_4_COMPONENT_Z; + psOperand->aui32Swizzle[3] = OPERAND_4_COMPONENT_W; + } + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + { + psOperand->aui32Swizzle[0] = DecodeOperand4CompSel1(*pui32Tokens); + } + } + + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32) + { + for (i = 0; i < psOperand->iNumComponents; ++i) + { + psOperand->afImmediates[i] = *((float*)(&pui32Tokens[ui32NumTokens])); + ui32NumTokens++; + } + } + else if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + for (i = 0; i < psOperand->iNumComponents; ++i) + { + psOperand->adImmediates[i] = *((double*)(&pui32Tokens[ui32NumTokens])); + ui32NumTokens += 2; + } + } + + // Used only for Metal + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL || psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH) + { + psOperand->ui32RegisterNumber = 0; + psOperand->ui32CompMask = 1; + } + + for (i = 0; i < psOperand->iIndexDims; ++i) + { + OPERAND_INDEX_REPRESENTATION eRep = DecodeOperandIndexRepresentation(i , *pui32Tokens); + + psOperand->eIndexRep[i] = eRep; + + psOperand->aui32ArraySizes[i] = 0; + psOperand->ui32RegisterNumber = 0; + + switch (eRep) + { + case OPERAND_INDEX_IMMEDIATE32: + { + psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); + psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; + break; + } + case OPERAND_INDEX_RELATIVE: + { + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); + + ui32NumTokens++; + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + psOperand->ui32RegisterNumber = *(pui32Tokens + ui32NumTokens); + psOperand->aui32ArraySizes[i] = psOperand->ui32RegisterNumber; + + ui32NumTokens++; + + psOperand->m_SubOperands[i].reset(new Operand()); + DecodeOperand(pui32Tokens + ui32NumTokens, psOperand->m_SubOperands[i].get()); + + ui32NumTokens++; + break; + } + default: + { + ASSERT(0); + break; + } + } + + // Indices should be ints + switch (eRep) + { + case OPERAND_INDEX_IMMEDIATE32: + case OPERAND_INDEX_RELATIVE: + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + int j = 0; + for (; j < psOperand->iNumComponents; j++) + { + psOperand->aeDataType[j] = SVT_INT; + } + break; + } + default: + { + break; + } + } + ui32NumTokens++; + } + + psOperand->specialName = ""; + + return ui32NumTokens; +} + +const uint32_t* DecodeDeclaration(Shader* psShader, const uint32_t* pui32Token, Declaration* psDecl, ShaderPhase *psPhase) +{ + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); + const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); + uint32_t ui32OperandOffset = 1; + + if (eOpcode < NUM_OPCODES && eOpcode >= 0) + { + psShader->aiOpcodeUsed[eOpcode] = 1; + } + + psDecl->eOpcode = eOpcode; + + psDecl->ui32IsShadowTex = 0; + + if (bExtended) + { + ui32OperandOffset = 2; + } + + switch (eOpcode) + { + case OPCODE_DCL_RESOURCE: // DCL* opcodes have + { + psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats. + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_SAMPLER: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eSamplerMode = DecodeSamplerMode(*pui32Token); + + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + int regSpace = 0; + psDecl->ui32NumOperands = 1; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->value.ui32IndexRange = pui32Token[ui32OperandOffset]; + + regSpace = psDecl->asOperands[0].GetRegisterSpace(psShader->eShaderType, psPhase->ePhase); + if (psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT) + { + uint32_t i; + const uint32_t indexRange = psDecl->value.ui32IndexRange; + const uint32_t reg = psDecl->asOperands[0].ui32RegisterNumber; + + psShader->aIndexedInput[regSpace][reg] = indexRange; + psShader->aIndexedInputParents[regSpace][reg] = reg; + + //-1 means don't declare this input because it falls in + //the range of an already declared array. + for (i = reg + 1; i < reg + indexRange; ++i) + { + psShader->aIndexedInput[regSpace][i] = -1; + psShader->aIndexedInputParents[regSpace][i] = reg; + } + } + + if (psDecl->asOperands[0].eType == OPERAND_TYPE_OUTPUT) + { + psShader->aIndexedOutput[regSpace][psDecl->asOperands[0].ui32RegisterNumber] = true; + } + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + psDecl->value.eOutputPrimitiveTopology = DecodeGSOutputPrimitiveTopology(*pui32Token); + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + psDecl->value.eInputPrimitive = DecodeGSInputPrimitive(*pui32Token); + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = pui32Token[1]; + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + psDecl->value.eTessPartitioning = DecodeTessPartitioning(*pui32Token); + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + psDecl->value.eTessDomain = DecodeTessDomain(*pui32Token); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + psDecl->value.eTessOutPrim = DecodeTessOutPrim(*pui32Token); + break; + } + case OPCODE_DCL_THREAD_GROUP: + { + psDecl->value.aui32WorkGroupSize[0] = pui32Token[1]; + psDecl->value.aui32WorkGroupSize[1] = pui32Token[2]; + psDecl->value.aui32WorkGroupSize[2] = pui32Token[3]; + break; + } + case OPCODE_DCL_INPUT: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INPUT_SIV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + if (psShader->eShaderType == PIXEL_SHADER) + { + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + } + break; + } + case OPCODE_DCL_INPUT_PS: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + Operand* psOperand = &psDecl->asOperands[0]; + DecodeOperand(pui32Token + ui32OperandOffset, psOperand); + + ShaderInfo::InOutSignature *psSig = NULL; + psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, (const ShaderInfo::InOutSignature**)&psSig); + + /* UNITY_FRAMEBUFFER_FETCH_AVAILABLE + special case mapping for inout color. + + In the fragment shader, setting inout var : SV_Target would result to + compiler error, unless SV_Target is defined to COLOR semantic for compatibility + reasons. Unfortunately, we still need to have a clear distinction between + vertex shader COLOR output and SV_Target, so the following workaround abuses + the fact that semantic names are case insensitive and preprocessor macros + are not. The resulting HLSL bytecode has semantics in case preserving form, + helps code generator to do extra work required for framebuffer fetch + + See also HLSLSupport.cginc + */ + if (psSig->eSystemValueType == NAME_UNDEFINED && + psSig->semanticName.size() == 5 && !strncmp(psSig->semanticName.c_str(), "CoLoR", 5)) + { + // Rename into something more readable, matches output + psSig->semanticName.replace(0, 9, "SV_Target"); + psOperand->iPSInOut = 1; + } + + break; + } + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + psDecl->ui32NumOperands = 1; + psDecl->value.eInterpolation = DecodeInterpolationMode(*pui32Token); + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_OUTPUT: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_OUTPUT_SGV: + { + break; + } + case OPCODE_DCL_OUTPUT_SIV: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + DecodeNameToken(pui32Token + 3, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_TEMPS: + { + psDecl->value.ui32NumTemps = *(pui32Token + ui32OperandOffset); + break; + } + case OPCODE_DCL_INDEXABLE_TEMP: + { + psDecl->sIdxTemp.ui32RegIndex = *(pui32Token + ui32OperandOffset); + psDecl->sIdxTemp.ui32RegCount = *(pui32Token + ui32OperandOffset + 1); + psDecl->sIdxTemp.ui32RegComponentSize = *(pui32Token + ui32OperandOffset + 2); + break; + } + case OPCODE_DCL_GLOBAL_FLAGS: + { + psDecl->value.ui32GlobalFlags = DecodeGlobalFlags(*pui32Token); + break; + } + case OPCODE_DCL_INTERFACE: + { + uint32_t func = 0, numClassesImplementingThisInterface, arrayLen, interfaceID; + interfaceID = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + psDecl->ui32TableLength = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + + numClassesImplementingThisInterface = DecodeInterfaceTableLength(*(pui32Token + ui32OperandOffset)); + arrayLen = DecodeInterfaceArrayLength(*(pui32Token + ui32OperandOffset)); + + ui32OperandOffset++; + + psDecl->value.iface.ui32InterfaceID = interfaceID; + psDecl->value.iface.ui32NumFuncTables = numClassesImplementingThisInterface; + psDecl->value.iface.ui32ArraySize = arrayLen; + + psShader->funcPointer[interfaceID].ui32NumBodiesPerTable = psDecl->ui32TableLength; + + for (; func < numClassesImplementingThisInterface; ++func) + { + uint32_t ui32FuncTable = *(pui32Token + ui32OperandOffset); + psShader->aui32FuncTableToFuncPointer[ui32FuncTable] = interfaceID; + + psShader->funcPointer[interfaceID].aui32FuncTables[func] = ui32FuncTable; + ui32OperandOffset++; + } + + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + uint32_t ui32Func; + const uint32_t ui32FuncTableID = pui32Token[ui32OperandOffset++]; + const uint32_t ui32NumFuncsInTable = pui32Token[ui32OperandOffset++]; + + for (ui32Func = 0; ui32Func < ui32NumFuncsInTable; ++ui32Func) + { + const uint32_t ui32FuncBodyID = pui32Token[ui32OperandOffset++]; + + psShader->aui32FuncBodyToFuncTable[ui32FuncBodyID] = ui32FuncTableID; + + psShader->funcTable[ui32FuncTableID].aui32FuncBodies[ui32Func] = ui32FuncBodyID; + } + +// OpcodeToken0 is followed by a DWORD that represents the function table +// identifier and another DWORD (TableLength) that gives the number of +// functions in the table. +// +// This is followed by TableLength DWORDs which are function body indices. +// + + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + psDecl->value.ui32MaxOutputVertexCount = DecodeOutputControlPointCount(*pui32Token); + break; + } + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_CONTROL_POINT_PHASE: + { + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { + psDecl->value.ui32HullPhaseInstanceCount = pui32Token[1]; + psPhase->ui32InstanceCount = psDecl->value.ui32HullPhaseInstanceCount; + break; + } + case OPCODE_CUSTOMDATA: + { + ui32TokenLength = pui32Token[1]; + { +// int iTupleSrc = 0, iTupleDest = 0; + //const uint32_t ui32ConstCount = pui32Token[1] - 2; + //const uint32_t ui32TupleCount = (ui32ConstCount / 4); + + const uint32_t ui32NumVec4 = (ui32TokenLength - 2) / 4; + + ICBVec4 const *pVec4Array = (ICBVec4 const *)(void*)(pui32Token + 2); + + /* must be a multiple of 4 */ + ASSERT(((ui32TokenLength - 2) % 4) == 0); + + psDecl->asImmediateConstBuffer.assign(pVec4Array, pVec4Array + ui32NumVec4); + + psDecl->ui32NumOperands = ui32NumVec4; + } + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + psDecl->value.fMaxTessFactor = *((float*)&pui32Token[1]); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + psDecl->ui32NumOperands = 2; + psDecl->value.eResourceDimension = DecodeResourceDimension(*pui32Token); + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + psDecl->sUAV.Type = DecodeResourceReturnType(0, pui32Token[ui32OperandOffset]); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + //This should be a RTYPE_UAV_RWBYTEADDRESS buffer. It is memory backed by + //a shader storage buffer whose is unknown at compile time. + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + const ResourceBinding* psBinding = NULL; + const ConstantBuffer* psBuffer = NULL; + + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = DecodeAccessCoherencyFlags(*pui32Token); + psDecl->sUAV.bCounter = 0; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + + switch (psBinding->eType) + { + case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER: + case RTYPE_UAV_APPEND_STRUCTURED: + case RTYPE_UAV_CONSUME_STRUCTURED: + psDecl->sUAV.bCounter = 1; + break; + default: + break; + } + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + const ResourceBinding* psBinding = NULL; + const ConstantBuffer* psBuffer = NULL; + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_TEXTURE, psBinding->ui32BindPoint, &psBuffer); + psDecl->ui32BufferStride = psBuffer->ui32TotalSizeInBytes; + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->ui32BufferStride = 4; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = 0; + + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + + psDecl->sTGSM.ui32Stride = pui32Token[ui32OperandOffset++]; + psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + psDecl->ui32NumOperands = 1; + psDecl->sUAV.ui32GloballyCoherentAccess = 0; + + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + + psDecl->sTGSM.ui32Stride = 4; + psDecl->sTGSM.ui32Count = pui32Token[ui32OperandOffset++]; + break; + } + case OPCODE_DCL_STREAM: + { + psDecl->ui32NumOperands = 1; + DecodeOperand(pui32Token + ui32OperandOffset, &psDecl->asOperands[0]); + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + psDecl->ui32NumOperands = 0; + psDecl->value.ui32GSInstanceCount = pui32Token[1]; + break; + } + default: + { + //Reached end of declarations + return 0; + } + } + + return pui32Token + ui32TokenLength; +} + +const uint32_t* DecodeInstruction(const uint32_t* pui32Token, Instruction* psInst, Shader* psShader, ShaderPhase *psPhase) +{ + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32Token); + const uint32_t bExtended = DecodeIsOpcodeExtended(*pui32Token); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32Token); + uint32_t ui32OperandOffset = 1; + +#ifdef _DEBUG + psInst->id = instructionID++; +#endif + + psInst->eOpcode = eOpcode; + + psInst->bSaturate = DecodeInstructionSaturate(*pui32Token); + psInst->ui32PreciseMask = DecodeInstructionPreciseMask(*pui32Token); + + psInst->bAddressOffset = 0; + + psInst->ui32FirstSrc = 1; + + psInst->iCausedSplit = 0; + + if (bExtended) + { + do + { + const uint32_t ui32ExtOpcodeToken = pui32Token[ui32OperandOffset]; + const EXTENDED_OPCODE_TYPE eExtType = DecodeExtendedOpcodeType(ui32ExtOpcodeToken); + + if (eExtType == EXTENDED_OPCODE_SAMPLE_CONTROLS) + { + struct {int i4 : 4;} sU; + struct {int i4 : 4;} sV; + struct {int i4 : 4;} sW; + + psInst->bAddressOffset = 1; + + sU.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_U, ui32ExtOpcodeToken); + sV.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_V, ui32ExtOpcodeToken); + sW.i4 = DecodeImmediateAddressOffset( + IMMEDIATE_ADDRESS_OFFSET_W, ui32ExtOpcodeToken); + + psInst->iUAddrOffset = sU.i4; + psInst->iVAddrOffset = sV.i4; + psInst->iWAddrOffset = sW.i4; + } + else if (eExtType == EXTENDED_OPCODE_RESOURCE_RETURN_TYPE) + { + psInst->xType = DecodeExtendedResourceReturnType(0, ui32ExtOpcodeToken); + psInst->yType = DecodeExtendedResourceReturnType(1, ui32ExtOpcodeToken); + psInst->zType = DecodeExtendedResourceReturnType(2, ui32ExtOpcodeToken); + psInst->wType = DecodeExtendedResourceReturnType(3, ui32ExtOpcodeToken); + } + else if (eExtType == EXTENDED_OPCODE_RESOURCE_DIM) + { + psInst->eResDim = DecodeExtendedResourceDimension(ui32ExtOpcodeToken); + } + + ui32OperandOffset++; + } + while (DecodeIsOpcodeExtended(pui32Token[ui32OperandOffset - 1])); + } + + if (eOpcode < NUM_OPCODES && eOpcode >= 0) + { + psShader->aiOpcodeUsed[eOpcode] = 1; + } + + switch (eOpcode) + { + //no operands + case OPCODE_CUT: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_RET: + case OPCODE_LOOP: + case OPCODE_ENDLOOP: + case OPCODE_BREAK: + case OPCODE_ELSE: + case OPCODE_ENDIF: + case OPCODE_CONTINUE: + case OPCODE_DEFAULT: + case OPCODE_ENDSWITCH: + case OPCODE_NOP: + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_FORK_PHASE: + case OPCODE_HS_JOIN_PHASE: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + break; + } + case OPCODE_SYNC: + { + psInst->ui32NumOperands = 0; + psInst->ui32FirstSrc = 0; + psInst->ui32SyncFlags = DecodeSyncFlags(*pui32Token); + break; + } + + //1 operand + case OPCODE_EMIT_STREAM: + case OPCODE_CUT_STREAM: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_CASE: + case OPCODE_SWITCH: + case OPCODE_LABEL: + { + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + break; + } + + case OPCODE_INTERFACE_CALL: + { + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; + psInst->ui32FuncIndexWithinInterface = pui32Token[ui32OperandOffset]; + ui32OperandOffset++; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + + break; + } + + /* Floating point instruction decodes */ + + //Instructions with two operands go here + case OPCODE_MOV: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_LOG: + case OPCODE_RSQ: + case OPCODE_EXP: + case OPCODE_SQRT: + case OPCODE_ROUND_PI: + case OPCODE_ROUND_NI: + case OPCODE_ROUND_Z: + case OPCODE_ROUND_NE: + case OPCODE_FRC: + case OPCODE_FTOU: + case OPCODE_FTOI: + case OPCODE_UTOF: + case OPCODE_ITOF: + case OPCODE_INEG: + case OPCODE_IMM_ATOMIC_ALLOC: + case OPCODE_IMM_ATOMIC_CONSUME: + case OPCODE_DMOV: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DRCP: + case OPCODE_COUNTBITS: + case OPCODE_FIRSTBIT_HI: + case OPCODE_FIRSTBIT_LO: + case OPCODE_FIRSTBIT_SHI: + case OPCODE_BFREV: + case OPCODE_F32TOF16: + case OPCODE_F16TOF32: + case OPCODE_RCP: + case OPCODE_DERIV_RTX: + case OPCODE_DERIV_RTY: + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_NOT: + case OPCODE_BUFINFO: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } + + //Instructions with three operands go here + case OPCODE_SINCOS: + { + psInst->ui32FirstSrc = 2; + //Intentional fall-through + } + case OPCODE_IMIN: + case OPCODE_UMIN: + case OPCODE_UMAX: + case OPCODE_MIN: + case OPCODE_IMAX: + case OPCODE_MAX: + case OPCODE_MUL: + case OPCODE_DIV: + case OPCODE_ADD: + case OPCODE_DP2: + case OPCODE_DP3: + case OPCODE_DP4: + case OPCODE_NE: + case OPCODE_OR: + case OPCODE_XOR: + case OPCODE_LT: + case OPCODE_IEQ: + case OPCODE_IADD: + case OPCODE_AND: + case OPCODE_GE: + case OPCODE_IGE: + case OPCODE_EQ: + case OPCODE_USHR: + case OPCODE_ISHL: + case OPCODE_ISHR: + case OPCODE_LD: + case OPCODE_ILT: + case OPCODE_INE: + case OPCODE_UGE: + case OPCODE_ULT: + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_DADD: + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DDIV: + { + psInst->ui32NumOperands = 3; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + break; + } + //Instructions with four operands go here + case OPCODE_MAD: + case OPCODE_MOVC: + case OPCODE_IMAD: + case OPCODE_UDIV: + case OPCODE_LOD: + case OPCODE_SAMPLE: + case OPCODE_GATHER4: + case OPCODE_LD_MS: + case OPCODE_UBFE: + case OPCODE_IBFE: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_DMOVC: + case OPCODE_DFMA: + case OPCODE_IMUL: + { + psInst->ui32NumOperands = 4; + + if (eOpcode == OPCODE_IMUL || eOpcode == OPCODE_UDIV) + { + psInst->ui32FirstSrc = 2; + } + + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + break; + } + case OPCODE_GATHER4_PO: + case OPCODE_SAMPLE_L: + case OPCODE_BFI: + case OPCODE_SWAPC: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + psInst->ui32NumOperands = 5; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); + break; + } + case OPCODE_GATHER4_C: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_B: + { + psInst->ui32NumOperands = 5; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); + + /* sample_b is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_B) + { + MarkTextureAsShadow(&psShader->sInfo, psPhase->psDecl, &psInst->asOperands[2]); + } + + break; + } + case OPCODE_GATHER4_PO_C: + case OPCODE_SAMPLE_D: + { + psInst->ui32NumOperands = 6; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[4]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[5]); + + /* sample_d is not a shadow sampler, others need flagging */ + if (eOpcode != OPCODE_SAMPLE_D) + { + MarkTextureAsShadow(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[2]); + } + break; + } + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + case OPCODE_DISCARD: + { + psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); + psInst->ui32NumOperands = 1; + psInst->ui32FirstSrc = 0; // no destination registers + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + break; + } + case OPCODE_CALLC: + { + psInst->eBooleanTestType = DecodeInstrTestBool(*pui32Token); + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_CUSTOMDATA: + { + psInst->ui32NumOperands = 0; + ui32TokenLength = pui32Token[1]; + break; + } + case OPCODE_EVAL_CENTROID: + { + psInst->ui32NumOperands = 2; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + case OPCODE_EVAL_SNAPPED: + case OPCODE_STORE_UAV_TYPED: + case OPCODE_LD_UAV_TYPED: + case OPCODE_LD_RAW: + case OPCODE_STORE_RAW: + { + psInst->ui32NumOperands = 3; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + break; + } + case OPCODE_STORE_STRUCTURED: + case OPCODE_LD_STRUCTURED: + { + psInst->ui32NumOperands = 4; + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[3]); + break; + } + case OPCODE_RESINFO: + { + psInst->ui32NumOperands = 3; + + psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); + + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[2]); + break; + } + case OPCODE_SAMPLE_INFO: + { + psInst->ui32NumOperands = 2; + + psInst->eResInfoReturnType = DecodeResInfoReturnType(pui32Token[0]); + + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[0]); + ui32OperandOffset += DecodeOperand(pui32Token + ui32OperandOffset, &psInst->asOperands[1]); + break; + } + case OPCODE_MSAD: + default: + { + ASSERT(0); + break; + } + } + + // For opcodes that sample textures, mark which samplers are used by each texture + { + uint32_t ui32TextureRegisterNumber = 0; + uint32_t ui32SamplerRegisterNumber = 0; + uint32_t bTextureSampleInstruction = 0; + switch (eOpcode) + { + case OPCODE_GATHER4: + // dest, coords, tex, sampler + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO: + //dest, coords, offset, tex, sampler + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_C: + //dest, coords, tex, sampler srcReferenceValue + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + case OPCODE_GATHER4_PO_C: + //dest, coords, offset, tex, sampler, srcReferenceValue + ui32TextureRegisterNumber = 3; + ui32SamplerRegisterNumber = 4; + bTextureSampleInstruction = 1; + break; + case OPCODE_SAMPLE: + case OPCODE_SAMPLE_L: + case OPCODE_SAMPLE_C: + case OPCODE_SAMPLE_C_LZ: + case OPCODE_SAMPLE_B: + case OPCODE_SAMPLE_D: + // dest, coords, tex, sampler [, reference] + ui32TextureRegisterNumber = 2; + ui32SamplerRegisterNumber = 3; + bTextureSampleInstruction = 1; + break; + default: + break; + } + + if (bTextureSampleInstruction) + { + MarkTextureSamplerPair(&psShader->sInfo, + psPhase->psDecl, + &psInst->asOperands[ui32TextureRegisterNumber], + &psInst->asOperands[ui32SamplerRegisterNumber], + psShader->textureSamplers); + } + } + + return pui32Token + ui32TokenLength; +} + +const uint32_t* DecodeShaderPhase(const uint32_t* pui32Tokens, + Shader* psShader, + const SHADER_PHASE_TYPE ePhaseType, + ShaderPhase *psPhase) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + + psPhase->ePhase = ePhaseType; + //Using ui32ShaderLength as the declaration and instruction count + //will allocate more than enough memory. Avoids having to + //traverse the entire shader just to get the real counts. + + psPhase->psDecl.clear(); + psPhase->psDecl.reserve(ui32ShaderLength); + + while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. + { + psPhase->psDecl.push_back(Declaration()); + const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &psPhase->psDecl[psPhase->psDecl.size() - 1], psPhase); + + if (pui32Result) + { + pui32CurrentToken = pui32Result; + + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + else + { + psPhase->psDecl.pop_back(); // Remove the last one, it wasn't needed after all + break; + } + } + + +//Instructions + psPhase->psInst.clear(); + psPhase->psInst.reserve(ui32ShaderLength); + + while (pui32CurrentToken < (psShader->pui32FirstToken + ui32ShaderLength)) + { + psPhase->psInst.push_back(Instruction()); + const uint32_t* nextInstr = DecodeInstruction(pui32CurrentToken, &psPhase->psInst[psPhase->psInst.size() - 1], psShader, psPhase); + +#ifdef _DEBUG + if (nextInstr == pui32CurrentToken) + { + ASSERT(0); + break; + } +#endif + + if (psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_FORK_PHASE || psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_HS_JOIN_PHASE) + { + psPhase->psInst.pop_back(); + return pui32CurrentToken; + } + pui32CurrentToken = nextInstr; + } + + return pui32CurrentToken; +} + +const void AllocateHullPhaseArrays(const uint32_t* pui32Tokens, + Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + uint32_t ui32PhaseCount = 2; // Always the main phase and the HS global declarations + uint32_t i; + + while (1) //Keep going until we reach the first non-declaration token, or the end of the shader. + { + uint32_t ui32TokenLength = DecodeInstructionLength(*pui32CurrentToken); + const OPCODE_TYPE eOpcode = DecodeOpcodeType(*pui32CurrentToken); + + if (eOpcode == OPCODE_CUSTOMDATA) + { + ui32TokenLength = pui32CurrentToken[1]; + } + + pui32CurrentToken = pui32CurrentToken + ui32TokenLength; + + switch (eOpcode) + { + case OPCODE_HS_CONTROL_POINT_PHASE: + case OPCODE_HS_JOIN_PHASE: + case OPCODE_HS_FORK_PHASE: + ui32PhaseCount++; + break; + default: + break; + } + + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + + psShader->asPhases.clear(); + psShader->asPhases.resize(ui32PhaseCount); + for (i = 0; i < ui32PhaseCount; i++) + psShader->asPhases[i].ui32InstanceCount = 1; +} + +const uint32_t* DecodeHullShader(const uint32_t* pui32Tokens, Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = psShader->ui32ShaderLength; + ShaderPhase *psPhase; + + AllocateHullPhaseArrays(pui32Tokens, psShader); + + // Index 1 is HS_GLOBAL_DECL + psShader->asPhases[1].psInst.clear(); + psShader->asPhases[1].psDecl.clear(); + psShader->asPhases[1].ePhase = HS_GLOBAL_DECL_PHASE; + psShader->asPhases[1].ui32InstanceCount = 1; + + // The next phase to parse in. + psPhase = &psShader->asPhases[2]; + + //Keep going until we have done all phases or the end of the shader. + while (1) + { + Declaration newDecl; + const uint32_t* pui32Result = DecodeDeclaration(psShader, pui32CurrentToken, &newDecl, psPhase); + + if (pui32Result) + { + pui32CurrentToken = pui32Result; + + if (newDecl.eOpcode == OPCODE_HS_CONTROL_POINT_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_CTRL_POINT_PHASE, psPhase); + psPhase++; + } + else if (newDecl.eOpcode == OPCODE_HS_FORK_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_FORK_PHASE, psPhase++); + } + else if (newDecl.eOpcode == OPCODE_HS_JOIN_PHASE) + { + pui32CurrentToken = DecodeShaderPhase(pui32CurrentToken, psShader, HS_JOIN_PHASE, psPhase++); + } + else + { + psShader->asPhases[1].psDecl.push_back(newDecl); + } + + if (pui32CurrentToken >= (psShader->pui32FirstToken + ui32ShaderLength)) + { + break; + } + } + else + { + break; + } + } + + return pui32CurrentToken; +} + +void Decode(const uint32_t* pui32Tokens, Shader* psShader) +{ + const uint32_t* pui32CurrentToken = pui32Tokens; + const uint32_t ui32ShaderLength = pui32Tokens[1]; + + psShader->ui32MajorVersion = DecodeProgramMajorVersion(*pui32CurrentToken); + psShader->ui32MinorVersion = DecodeProgramMinorVersion(*pui32CurrentToken); + psShader->eShaderType = DecodeShaderType(*pui32CurrentToken); + + pui32CurrentToken++;//Move to shader length + psShader->ui32ShaderLength = ui32ShaderLength; + pui32CurrentToken++;//Move to after shader length (usually a declaration) + + psShader->pui32FirstToken = pui32Tokens; + + if (psShader->eShaderType == HULL_SHADER) + { + // DecodeHullShader will allocate psShader->asPhases array. + pui32CurrentToken = DecodeHullShader(pui32CurrentToken, psShader); + return; + } + else + { + psShader->asPhases.clear(); + psShader->asPhases.resize(1); + } + + // Phase 0 is always the main phase + psShader->asPhases[0].ui32InstanceCount = 1; + + DecodeShaderPhase(pui32CurrentToken, psShader, MAIN_PHASE, &psShader->asPhases[0]); +} + +Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags) +{ + Shader* psShader; + DXBCContainerHeader* header = (DXBCContainerHeader*)data; + uint32_t i; + uint32_t chunkCount; + uint32_t* chunkOffsets; + ReflectionChunks refChunks; + uint32_t* shaderChunk = 0; + + if (header->fourcc != FOURCC_DXBC) + { + ASSERT(0 && "Invalid shader type (DX9 shaders no longer supported)!"); + } + + refChunks.pui32Inputs = NULL; + refChunks.pui32Interfaces = NULL; + refChunks.pui32Outputs = NULL; + refChunks.pui32Resources = NULL; + refChunks.pui32Inputs11 = NULL; + refChunks.pui32Outputs11 = NULL; + refChunks.pui32OutputsWithStreams = NULL; + refChunks.pui32PatchConstants = NULL; + refChunks.pui32PatchConstants11 = NULL; + + chunkOffsets = (uint32_t*)(header + 1); + + chunkCount = header->chunkCount; + + for (i = 0; i < chunkCount; ++i) + { + uint32_t offset = chunkOffsets[i]; + + DXBCChunkHeader* chunk = (DXBCChunkHeader*)((char*)data + offset); + + switch (chunk->fourcc) + { + case FOURCC_ISGN: + { + refChunks.pui32Inputs = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_ISG1: + { + refChunks.pui32Inputs11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_RDEF: + { + refChunks.pui32Resources = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_IFCE: + { + refChunks.pui32Interfaces = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSGN: + { + refChunks.pui32Outputs = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSG1: + { + refChunks.pui32Outputs11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_OSG5: + { + refChunks.pui32OutputsWithStreams = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_SHDR: + case FOURCC_SHEX: + { + shaderChunk = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_PSGN: + { + refChunks.pui32PatchConstants = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_PSG1: + { + refChunks.pui32PatchConstants11 = (uint32_t*)(chunk + 1); + break; + } + case FOURCC_STAT: + case FOURCC_SFI0: + { + break; // Ignored + } + default: + { +// ASSERT(0); // Uncomment this to hunt for unknown chunks later on. + break; + } + } + } + + if (shaderChunk) + { + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + + psShader = new Shader(); + + ui32MajorVersion = DecodeProgramMajorVersion(*shaderChunk); + ui32MinorVersion = DecodeProgramMinorVersion(*shaderChunk); + + LoadShaderInfo(ui32MajorVersion, + ui32MinorVersion, + &refChunks, + &psShader->sInfo, decodeFlags); + + Decode(shaderChunk, psShader); + + return psShader; + } + + return 0; +} diff --git a/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h b/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h new file mode 100644 index 0000000..b9263cf --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/ControlFlowGraph.h @@ -0,0 +1,151 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include + +struct Instruction; +class Operand; + +namespace HLSLcc +{ + using namespace std; + +namespace ControlFlow +{ + class BasicBlock; + + class ControlFlowGraph + { + friend class BasicBlock; + public: + ControlFlowGraph() + : m_BlockMap() + , m_BlockStorage() + {} + + typedef std::vector > BasicBlockStorage; + + const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction); + + // Only works for instructions that start the basic block + const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const; + + // non-const version for BasicBlock + BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction); + + const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; } + private: + + // Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block + typedef std::map BasicBlockMap; + + BasicBlockMap m_BlockMap; + + // auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these + BasicBlockStorage m_BlockStorage; + }; + + + class BasicBlock + { + friend class ControlFlowGraph; + public: + // A set of register indices, one per each vec4 component per register + typedef std::set RegisterSet; + // The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block + typedef std::set ConnectionSet; + + struct Definition + { + Definition(const Instruction* i = nullptr, const Operand* o = nullptr) + : m_Instruction(i) + , m_Operand(o) + {} + + Definition(const Definition& a) = default; + Definition(Definition&& a) = default; + ~Definition() = default; + + Definition& operator=(const Definition& a) = default; + Definition& operator=(Definition&& a) = default; + + bool operator==(const Definition& a) const + { + if (a.m_Instruction != m_Instruction) + return false; + return a.m_Operand == m_Operand; + } + + bool operator!=(const Definition& a) const + { + if (a.m_Instruction == m_Instruction) + return false; + return a.m_Operand != m_Operand; + } + + bool operator<(const Definition& a) const + { + if (m_Instruction != a.m_Instruction) + return m_Instruction < a.m_Instruction; + return m_Operand < a.m_Operand; + } + + const Instruction *m_Instruction; + const Operand *m_Operand; + }; + + typedef std::set ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable + typedef std::map ReachableVariables; // A VisibleDefinitionSet for each variable*component. + + const Instruction *First() const { return m_First; } + const Instruction *Last() const { return m_Last; } + + const RegisterSet &UEVar() const { return m_UEVar; } + const RegisterSet &VarKill() const { return m_VarKill; } + + const ConnectionSet &Preceding() const { return m_Preceding; } + const ConnectionSet &Succeeding() const { return m_Succeeding; } + + const ReachableVariables &DEDef() const { return m_DEDef; } + const ReachableVariables &Reachable() const { return m_Reachable; } + + // Helper function: Do union of 2 ReachableVariables, store result in a. + static void RVarUnion(ReachableVariables &a, const ReachableVariables &b); + + private: + + // Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build() + BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd); + + // Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already. + void Build(); + + bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed. + + + BasicBlock * AddChildBasicBlock(const Instruction *psFirst); + + private: + ControlFlowGraph &m_Graph; // The graph object containing this block + + const Instruction *m_First; // The first instruction in the basic block + const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction + const Instruction *m_End; // past-the-end pointer + + RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block) + RegisterSet m_VarKill; // Set of variables that are defined in this block. + + ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG + ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG + + ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set. + + ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block. + }; +} +} diff --git a/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h b/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h new file mode 100644 index 0000000..69ad807 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/ControlFlowGraphUtils.h @@ -0,0 +1,30 @@ +#pragma once + +struct Instruction; + +namespace HLSLcc +{ +namespace ControlFlow +{ + class Utils + { + public: + // For a given flow-control instruction, find the corresponding jump location: + // If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1 + // For ELSE, find same level ENDIF + 1 + // For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1 + // For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1 + // For ENDLOOP, find previous same-level LOOP + 1 + // For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels + // For CONTINUE/C the previous LOOP + 1 + // Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block. + // Note that CASE labels fall through. + // Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc. + // If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH + // If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it. + static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0); + + static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0); + }; +} +} diff --git a/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h b/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h new file mode 100644 index 0000000..e01eb18 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/DataTypeAnalysis.h @@ -0,0 +1,15 @@ +#pragma once + +#include "include/ShaderInfo.h" +#include + +class HLSLCrossCompilerContext; +struct Instruction; + +namespace HLSLcc +{ +namespace DataTypeAnalysis +{ + void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector &instructions, uint32_t ui32TempCount, std::vector &results); +} +} diff --git a/third_party/HLSLcc/src/internal_includes/Declaration.h b/third_party/HLSLcc/src/internal_includes/Declaration.h new file mode 100644 index 0000000..0586a22 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/Declaration.h @@ -0,0 +1,118 @@ +#pragma once + +#include +#include +#include "internal_includes/tokens.h" +#include "internal_includes/Operand.h" + +typedef struct ICBVec4_TAG +{ + uint32_t a; + uint32_t b; + uint32_t c; + uint32_t d; +} ICBVec4; + +#define ACCESS_FLAG_READ 0x1 +#define ACCESS_FLAG_WRITE 0x2 +#define ACCESS_FLAG_ATOMIC 0x4 + +struct Declaration +{ + Declaration() : + eOpcode(OPCODE_INVALID), + ui32NumOperands(0), + ui32BufferStride(0), + ui32TableLength(0), + ui32IsShadowTex(0) + {} + + OPCODE_TYPE eOpcode; + + uint32_t ui32NumOperands; + + Operand asOperands[2]; + + std::vector asImmediateConstBuffer; + //The declaration can set one of these + //values depending on the opcode. + union + { + uint32_t ui32GlobalFlags; + uint32_t ui32NumTemps; + RESOURCE_DIMENSION eResourceDimension; + INTERPOLATION_MODE eInterpolation; + PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology; + PRIMITIVE eInputPrimitive; + uint32_t ui32MaxOutputVertexCount; + TESSELLATOR_DOMAIN eTessDomain; + TESSELLATOR_PARTITIONING eTessPartitioning; + TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim; + uint32_t aui32WorkGroupSize[3]; + uint32_t ui32HullPhaseInstanceCount; + float fMaxTessFactor; + uint32_t ui32IndexRange; + uint32_t ui32GSInstanceCount; + SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode. + + struct Interface_TAG + { + uint32_t ui32InterfaceID; + uint32_t ui32NumFuncTables; + uint32_t ui32ArraySize; + } iface; + } value; + + uint32_t ui32BufferStride; + + struct UAV_TAG + { + UAV_TAG() : + ui32GloballyCoherentAccess(0), + bCounter(0), + Type(RETURN_TYPE_UNORM), + ui32NumComponents(0), + ui32AccessFlags(0) + { + } + + uint32_t ui32GloballyCoherentAccess; + uint8_t bCounter; + RESOURCE_RETURN_TYPE Type; + uint32_t ui32NumComponents; + uint32_t ui32AccessFlags; + } sUAV; + + struct TGSM_TAG + { + uint32_t ui32Stride; + uint32_t ui32Count; + + TGSM_TAG() : + ui32Stride(0), + ui32Count(0) + { + } + } sTGSM; + + struct IndexableTemp_TAG + { + uint32_t ui32RegIndex; + uint32_t ui32RegCount; + uint32_t ui32RegComponentSize; + + IndexableTemp_TAG() : + ui32RegIndex(0), + ui32RegCount(0), + ui32RegComponentSize(0) + { + } + } sIdxTemp; + + uint32_t ui32TableLength; + + uint32_t ui32IsShadowTex; + + // Set indexed by sampler register number. + std::set samplersUsed; +}; diff --git a/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h b/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h new file mode 100644 index 0000000..fa2af76 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/HLSLCrossCompilerContext.h @@ -0,0 +1,81 @@ +#pragma once + +#include +#include +#include +#include "bstrlib.h" + +class Shader; +class GLSLCrossDependencyData; +class ShaderPhase; +class Translator; +class Operand; +class HLSLccReflection; + +class HLSLCrossCompilerContext +{ +public: + HLSLCrossCompilerContext(HLSLccReflection &refl) : + glsl(nullptr), + extensions(nullptr), + beforeMain(nullptr), + currentGLSLString(nullptr), + currentPhase(0), + indent(0), + flags(0), + psShader(nullptr), + psDependencies(nullptr), + inputPrefix(nullptr), + outputPrefix(nullptr), + psTranslator(nullptr), + m_Reflection(refl) + {} + + bstring glsl; + bstring extensions; + bstring beforeMain; + + bstring* currentGLSLString;//either glsl or earlyMain of current phase + + uint32_t currentPhase; + + int indent; + unsigned int flags; + + // Helper functions for checking flags + // Returns true if VULKAN_BINDINGS flag is set + bool IsVulkan() const; + + // Helper functions for checking flags + // Returns true if HLSLCC_FLAG_NVN_TARGET flag is set + bool IsSwitch() const; + + Shader* psShader; + GLSLCrossDependencyData* psDependencies; + const char *inputPrefix; // Prefix for shader inputs + const char *outputPrefix; // Prefix for shader outputs + + void DoDataTypeAnalysis(ShaderPhase *psPhase); + void ReserveFramebufferFetchInputs(); + + void ClearDependencyData(); + + void AddIndentation(); + + // Currently active translator + Translator *psTranslator; + + HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info + + // Retrieve the name for which the input or output is declared as. Takes into account possible redirections. + std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const; + std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const; + + bool OutputNeedsDeclaring(const Operand* psOperand, const int count); + + bool RequireExtension(const std::string &extName); + bool EnableExtension(const std::string &extName); + +private: + std::set m_EnabledExtensions; +}; diff --git a/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h b/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h new file mode 100644 index 0000000..6d7604e --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/HLSLccToolkit.h @@ -0,0 +1,134 @@ +#pragma once +#include "hlslcc.h" +#include "bstrlib.h" +#include +#include +#include + +#include "internal_includes/Instruction.h" +#include "internal_includes/Operand.h" + +class HLSLCrossCompilerContext; +struct ConstantBuffer; + +namespace HLSLcc +{ + uint32_t GetNumberBitsSet(uint32_t a); + + uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType); + + SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags); + + const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true); + + const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision); + + const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components); + + std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows); + + void AddSwizzleUsingElementCount(bstring dest, uint32_t count); + + int WriteMaskToComponentCount(uint32_t writeMask); + + uint32_t BuildComponentMaskFromElementCount(int count); + + // Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc) + bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src); + + // Convert resource return type to SVT_ flags + uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType); + + SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec); + + RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type); + + REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type); + + uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount); + + bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode); + + bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB); + + int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim); + + SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b); + + // Returns true if the instruction adds 1 to the destination temp register + bool IsAddOneInstruction(const Instruction *psInst); + + bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest); + + bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf); + + // Helper function to print floats with full precision + void PrintFloat(bstring b, float f); + + bstring GetEarlyMain(HLSLCrossCompilerContext *psContext); + bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext); + + // Flags for ForeachOperand + // Process suboperands +#define FEO_FLAG_SUBOPERAND 1 + // Process src operands +#define FEO_FLAG_SRC_OPERAND 2 + // Process destination operands +#define FEO_FLAG_DEST_OPERAND 4 + // Convenience: Process all operands, both src and dest, and all suboperands +#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND) + + // For_each for all operands within a range of instructions. Flags above. + template void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback) + { + ItrType inst = _begin; + while (inst != _end) + { + uint32_t i, k; + + if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = 0; i < inst->ui32FirstSrc; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_DEST_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND); + } + } + } + + if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND)) + { + for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++) + { + if (flags & FEO_FLAG_SUBOPERAND) + { + for (k = 0; k < MAX_SUB_OPERANDS; k++) + { + if (inst->asOperands[i].m_SubOperands[k].get()) + { + callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND); + } + } + } + if (flags & FEO_FLAG_SRC_OPERAND) + { + callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND); + } + } + } + + inst++; + } + } +} diff --git a/third_party/HLSLcc/src/internal_includes/Instruction.h b/third_party/HLSLcc/src/internal_includes/Instruction.h new file mode 100644 index 0000000..a2826aa --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/Instruction.h @@ -0,0 +1,184 @@ +#pragma once + +#include "internal_includes/Operand.h" +#include "internal_includes/tokens.h" +#include "include/ShaderInfo.h" +#include + +#define ATOMIC_ADDRESS_BASIC 0 +#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1 +#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2 + +#define TEXSMP_FLAG_NONE 0x0 +#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand +#define TEXSMP_FLAG_DEPTHCOMPARE 0x2 +#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0 +#define TEXSMP_FLAG_BIAS 0x8 +#define TEXSMP_FLAG_GRAD 0x10 +//Gather specific flags +#define TEXSMP_FLAG_GATHER 0x20 +#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand + +struct Instruction +{ + Instruction() : + eOpcode(OPCODE_NOP), + eBooleanTestType(INSTRUCTION_TEST_ZERO), + ui32NumOperands(0), + ui32FirstSrc(0), + m_Uses(), + m_SkipTranslation(false), + m_InductorRegister(0), + bSaturate(0), + ui32SyncFlags(0), + ui32PreciseMask(0), + ui32FuncIndexWithinInterface(0), + eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), + bAddressOffset(0), + iUAddrOffset(0), + iVAddrOffset(0), + iWAddrOffset(0), + xType(RETURN_TYPE_UNUSED), + yType(RETURN_TYPE_UNUSED), + zType(RETURN_TYPE_UNUSED), + wType(RETURN_TYPE_UNUSED), + eResDim(RESOURCE_DIMENSION_UNKNOWN), + iCausedSplit(0), + id(0) + { + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + } + + // For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT) + Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) : + ui32SyncFlags(0), + bSaturate(0), + ui32PreciseMask(0), + ui32FuncIndexWithinInterface(0), + eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT), + bAddressOffset(0), + iUAddrOffset(0), + iVAddrOffset(0), + iWAddrOffset(0), + xType(RETURN_TYPE_UNUSED), + yType(RETURN_TYPE_UNUSED), + zType(RETURN_TYPE_UNUSED), + wType(RETURN_TYPE_UNUSED), + eResDim(RESOURCE_DIMENSION_UNKNOWN), + iCausedSplit(0) + { + id = _id; + eOpcode = opcode; + eBooleanTestType = INSTRUCTION_TEST_ZERO; + ui32FirstSrc = 0; + ui32NumOperands = 0; + m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0; + m_SkipTranslation = false; + m_InductorRegister = 0; + + if (reg1Mask == 0) + return; + + ui32NumOperands++; + asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP; + asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1; + asOperands[0].ui32CompMask = reg1Mask; + asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg2Mask == 0) + return; + + ui32FirstSrc = 1; + ui32NumOperands++; + + asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2; + asOperands[1].ui32CompMask = reg2Mask; + asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg3Mask == 0) + return; + ui32NumOperands++; + + asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3; + asOperands[2].ui32CompMask = reg3Mask; + asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + + if (reg4Mask == 0) + return; + ui32NumOperands++; + + asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP; + asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4; + asOperands[3].ui32CompMask = reg4Mask; + asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + } + + // Returns true if this instruction is a conditional branch + bool IsConditionalBranchInstruction() const + { + switch (eOpcode) + { + case OPCODE_IF: + case OPCODE_BREAKC: + case OPCODE_CONTINUEC: + case OPCODE_RETC: + return true; + default: + return false; + } + } + + bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const; + + // Flags for ChangeOperandTempRegister +#define UD_CHANGE_SUBOPERANDS 1 +#define UD_CHANGE_MAIN_OPERAND 2 +#define UD_CHANGE_ALL 3 + + void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase); + + + OPCODE_TYPE eOpcode; + INSTRUCTION_TEST_BOOLEAN eBooleanTestType; + uint32_t ui32SyncFlags; + uint32_t ui32NumOperands; + uint32_t ui32FirstSrc; + Operand asOperands[6]; + uint32_t bSaturate; + uint32_t ui32PreciseMask; + uint32_t ui32FuncIndexWithinInterface; + RESINFO_RETURN_TYPE eResInfoReturnType; + + int bAddressOffset; + int8_t iUAddrOffset; + int8_t iVAddrOffset; + int8_t iWAddrOffset; + RESOURCE_RETURN_TYPE xType, yType, zType, wType; + RESOURCE_DIMENSION eResDim; + int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking + + struct Use + { + Use() : m_Inst(0), m_Op(0) {} + Use(const Use& a) = default; + Use(Use&& a) = default; + Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} + ~Use() = default; + + Use& operator=(const Use& a) = default; + Use& operator=(Use&& a) = default; + + Instruction* m_Inst; // The instruction that references the result of this instruction + Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand. + }; + + std::vector m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg. + + Instruction* m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment. + bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation) + uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it + + uint64_t id; +}; diff --git a/third_party/HLSLcc/src/internal_includes/LoopTransform.h b/third_party/HLSLcc/src/internal_includes/LoopTransform.h new file mode 100644 index 0000000..dacec4b --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/LoopTransform.h @@ -0,0 +1,8 @@ +#pragma once + +class ShaderPhase; +class HLSLCrossCompilerContext; +namespace HLSLcc +{ + void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase); +} diff --git a/third_party/HLSLcc/src/internal_includes/Operand.h b/third_party/HLSLcc/src/internal_includes/Operand.h new file mode 100644 index 0000000..59bae49 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/Operand.h @@ -0,0 +1,150 @@ +#pragma once + +#include "internal_includes/tokens.h" +#include +#include + +enum { MAX_SUB_OPERANDS = 3 }; +class Operand; +class HLSLCrossCompilerContext; +struct Instruction; + +#if _MSC_VER +// We want to disable the "array will be default-initialized" warning, as that's exactly what we want +#pragma warning(disable: 4351) +#endif + +class Operand +{ +public: + typedef std::shared_ptr SubOperandPtr; + + Operand() + : + iExtended(), + eType(), + eModifier(), + eMinPrecision(), + iIndexDims(), + iWriteMask(), + iGSInput(), + iPSInOut(), + iWriteMaskEnabled(), + iArrayElements(), + iNumComponents(), + eSelMode(), + ui32CompMask(), + ui32Swizzle(), + aui32Swizzle(), + aui32ArraySizes(), + ui32RegisterNumber(), + afImmediates(), + adImmediates(), + eSpecialName(), + specialName(), + eIndexRep(), + m_SubOperands(), + aeDataType(), + m_Rebase(0), + m_Size(0), + m_Defines(), + m_ForLoopInductorName(0) +#ifdef _DEBUG + , id(0) +#endif + {} + + // Retrieve the mask of all the components this operand accesses (either reads from or writes to). + // Note that destination writemask does affect the effective access mask. + uint32_t GetAccessMask() const; + + // Returns the index of the highest accessed component, based on component mask + int GetMaxComponent() const; + + bool IsSwizzleReplicated() const; + + // Get the number of elements returned by operand, taking additional component mask into account + //e.g. + //.z = 1 + //.x = 1 + //.yw = 2 + uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const; + + // When this operand is used as an input declaration, how many components does it have? + int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const; + + // Retrieve the operand data type. + SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const; + + // Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch + int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const; + // Same as above but with explicit shader type and phase + int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const; + + // Find the operand that contains the dynamic index for this operand (array in constant buffer). + // When isAoS is true, we'll try to find the original index var to avoid additional calculations. + // needsIndexCalcRevert output will tell if we need to divide the value to get the correct index. + Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const; + + // Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible + static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec); + + int iExtended; + OPERAND_TYPE eType; + OPERAND_MODIFIER eModifier; + OPERAND_MIN_PRECISION eMinPrecision; + int iIndexDims; + int iWriteMask; + int iGSInput; + int iPSInOut; + int iWriteMaskEnabled; + int iArrayElements; + int iNumComponents; + + OPERAND_4_COMPONENT_SELECTION_MODE eSelMode; + uint32_t ui32CompMask; + uint32_t ui32Swizzle; + uint32_t aui32Swizzle[4]; + + uint32_t aui32ArraySizes[3]; + uint32_t ui32RegisterNumber; + //If eType is OPERAND_TYPE_IMMEDIATE32 + float afImmediates[4]; + //If eType is OPERAND_TYPE_IMMEDIATE64 + double adImmediates[4]; + + SPECIAL_NAME eSpecialName; + std::string specialName; + + OPERAND_INDEX_REPRESENTATION eIndexRep[3]; + + SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS]; + + //One type for each component. + SHADER_VARIABLE_TYPE aeDataType[4]; + + uint32_t m_Rebase; // Rebase value, for constant array accesses. + uint32_t m_Size; // Component count, only for constant array access. + + struct Define + { + Define() : m_Inst(0), m_Op(0) {} + Define(const Define& a) = default; + Define(Define&& a) = default; + Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {} + ~Define() = default; + + Define& operator=(const Define& other) = default; + Define& operator=(Define&& other) = default; + + Instruction* m_Inst; // Instruction that writes to the temp + Operand* m_Op; // The (destination) operand within that instruction. + }; + + std::vector m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP) + uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber) + +#ifdef _DEBUG + uint64_t id; +#endif +}; diff --git a/third_party/HLSLcc/src/internal_includes/Shader.h b/third_party/HLSLcc/src/internal_includes/Shader.h new file mode 100644 index 0000000..98a716b --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/Shader.h @@ -0,0 +1,255 @@ +#pragma once + +#include +#include +#include + +#include "growing_array.h" +#include "internal_includes/tokens.h" +#include "internal_includes/reflect.h" +#include "include/ShaderInfo.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/ControlFlowGraph.h" +#include "bstrlib.h" + +struct ConstantArrayChunk +{ + ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {} + ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse) + : m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0) + { + m_UseSites.push_back(firstUse); + } + + uint32_t m_Size; + uint32_t m_AccessMask; + uint32_t m_Rebase; + uint32_t m_ComponentCount; + + std::vector m_UseSites; +}; +typedef std::multimap ChunkMap; + +struct ConstantArrayInfo +{ + ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {} + + Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array + ChunkMap m_Chunks; // map of , same start offset might have multiple entries for different access masks +}; + +class ShaderPhase +{ +public: + ShaderPhase() + : + ePhase(MAIN_PHASE), + ui32InstanceCount(0), + postShaderCode(), + hasPostShaderCode(0), + earlyMain(), + ui32OrigTemps(0), + ui32TotalTemps(0), + psTempDeclaration(NULL), + pui32SplitInfo(), + peTempTypes(), + acInputNeedsRedirect(), + acOutputNeedsRedirect(), + acPatchConstantsNeedsRedirect(), + m_CFG(), + m_CFGInitialized(false), + m_NextFreeTempRegister(1), + m_NextTexCoordTemp(0) + {} + + void ResolveUAVProperties(const ShaderInfo& sInfo); + + void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier + + void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller + + void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first! + + ConstantArrayInfo m_ConstantArrayInfo; + + std::vector psDecl; + std::vector psInst; + + SHADER_PHASE_TYPE ePhase; + uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1. + bstring postShaderCode;//End of main or before emit() + int hasPostShaderCode; + + bstring earlyMain;//Code to be inserted at the start of phase + + uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared + uint32_t ui32TotalTemps; // The number of temporaries this phase has now + Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode + + // The split table is a table containing the index of the original register this register was split out from, or 0xffffffff + // Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count + std::vector pui32SplitInfo; + std::vector peTempTypes; + + // These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together. + std::vector acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared. + std::vector acOutputNeedsRedirect; // Same for outputs + std::vector acPatchConstantsNeedsRedirect; // Same for patch constants + + // Get the Control Flow Graph for this phase, build it if necessary. + HLSLcc::ControlFlow::ControlFlowGraph &GetCFG(); + + uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops. + uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds + +private: + bool m_CFGInitialized; + HLSLcc::ControlFlow::ControlFlowGraph m_CFG; +}; + +class Shader +{ +public: + + Shader() + : + ui32MajorVersion(0), + ui32MinorVersion(0), + eShaderType(INVALID_SHADER), + eTargetLanguage(LANG_DEFAULT), + extensions(0), + fp64(0), + ui32ShaderLength(0), + aui32FuncTableToFuncPointer(), + aui32FuncBodyToFuncTable(), + funcTable(), + funcPointer(), + ui32NextClassFuncName(), + pui32FirstToken(NULL), + asPhases(), + sInfo(), + abScalarInput(), + abScalarOutput(), + aIndexedInput(), + aIndexedOutput(), + aIndexedInputParents(), + aeResourceDims(), + acInputDeclared(), + acOutputDeclared(), + aiOpcodeUsed(NUM_OPCODES, 0), + ui32CurrentVertexOutputStream(0), + textureSamplers(), + m_DummySamplerDeclared(false), + maxSemanticIndex(0) + { + } + + // Retrieve the number of components the temp register has. + uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const; + + //Hull shaders have multiple phases. + //Each phase has its own temps. + //Convert from per-phase temps to global temps. + void ConsolidateHullTempVars(); + + // Detect temp registers per data type that are actually used. + void PruneTempRegisters(); + + // Check if inputs and outputs are accessed across semantic boundaries + // as in, 2x texcoord vec2's are packed together as vec4 but still accessed together. + void AnalyzeIOOverlap(); + + // Compute maxSemanticIndex based on the results of AnalyzeIOOverlap + void SetMaxSemanticIndex(); + + // Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs. + void ForcePositionToHighp(); + + void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used. + + void ExpandSWAPCs(); + + uint32_t ui32MajorVersion; + uint32_t ui32MinorVersion; + SHADER_TYPE eShaderType; + + GLLang eTargetLanguage; + const struct GlExtensions *extensions; + + int fp64; + + //DWORDs in program code, including version and length tokens. + uint32_t ui32ShaderLength; + + + //Instruction* functions;//non-main subroutines + HLSLcc::growing_vector aui32FuncTableToFuncPointer; // dynamic alloc? + HLSLcc::growing_vector aui32FuncBodyToFuncTable; + + struct FuncTableEntry + { + HLSLcc::growing_vector aui32FuncBodies; + }; + HLSLcc::growing_vector funcTable; + + struct FuncPointerEntry + { + HLSLcc::growing_vector aui32FuncTables; + uint32_t ui32NumBodiesPerTable; + }; + + HLSLcc::growing_vector funcPointer; + + HLSLcc::growing_vector ui32NextClassFuncName; + + const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream. + + std::vector asPhases; + + ShaderInfo sInfo; + + // There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex. + // Which one is used depends on the context: + // per-vertex space is used in vertex/pixel/geom shaders always + // hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT) + // domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT + + // Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch + // Note that these ints are component masks + HLSLcc::growing_vector abScalarInput[2]; + HLSLcc::growing_vector abScalarOutput[2]; + + HLSLcc::growing_vector aIndexedInput[2]; + HLSLcc::growing_vector aIndexedOutput[2]; + + HLSLcc::growing_vector aIndexedInputParents[2]; + + HLSLcc::growing_vector aeResourceDims; + + HLSLcc::growing_vector acInputDeclared[2]; + HLSLcc::growing_vector acOutputDeclared[2]; + + std::vector aiOpcodeUsed; // Initialized to NUM_OPCODES elements above. + + uint32_t ui32CurrentVertexOutputStream; + + TextureSamplerPairs textureSamplers; + + std::vector psIntTempSizes; // Array for whether this temp register needs declaration as int temp + std::vector psInt16TempSizes; // min16ints + std::vector psInt12TempSizes; // min12ints + std::vector psUIntTempSizes; // Same for uints + std::vector psUInt16TempSizes; // ... and for uint16's + std::vector psFloatTempSizes; // ...and for floats + std::vector psFloat16TempSizes; // ...and for min16floats + std::vector psFloat10TempSizes; // ...and for min10floats + std::vector psDoubleTempSizes; // ...and for doubles + std::vector psBoolTempSizes; // ... and for bools + + bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that. + uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis + +private: + void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand); +}; diff --git a/third_party/HLSLcc/src/internal_includes/Translator.h b/third_party/HLSLcc/src/internal_includes/Translator.h new file mode 100644 index 0000000..ae5224a --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/Translator.h @@ -0,0 +1,32 @@ +#pragma once +#include "HLSLCrossCompilerContext.h" +#include "Shader.h" + +struct Declaration; +// Base class for translator backend implenentations. +class Translator +{ +protected: + HLSLCrossCompilerContext *psContext; +public: + explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {} + virtual ~Translator() {} + + virtual bool Translate() = 0; + + virtual void TranslateDeclaration(const Declaration *psDecl) = 0; + + // Translate system value type to name, return true if succeeded and no further translation is necessary + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0; + + // In GLSL, the input and output names cannot clash. + // Also, the output name of previous stage must match the input name of the next stage. + // So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. + // + virtual void SetIOPrefixes() = 0; + + void SetExtensions(const struct GlExtensions *ext) + { + psContext->psShader->extensions = ext; + } +}; diff --git a/third_party/HLSLcc/src/internal_includes/UseDefineChains.h b/third_party/HLSLcc/src/internal_includes/UseDefineChains.h new file mode 100644 index 0000000..9c2b582 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/UseDefineChains.h @@ -0,0 +1,138 @@ +#pragma once + +#include +#include +#include +#include +#include + +#include +#include + +struct DefineUseChainEntry; +struct UseDefineChainEntry; + +typedef std::set DefineSet; +typedef std::set UsageSet; + +struct Instruction; +class Operand; +class ShaderInfo; +namespace HLSLcc +{ +namespace ControlFlow +{ + class ControlFlowGraph; +} +} + + +// Def-Use chain per temp component +struct DefineUseChainEntry +{ + DefineUseChainEntry() + : psInst(0) + , psOp(0) + , usages() + , writeMask(0) + , index(0) + , isStandalone(0) + { + memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *)); + } + + Instruction *psInst; // The declaration (write to this temp component) + Operand *psOp; // The operand within this instruction for the write target + UsageSet usages; // List of usages that are dependent on this write + uint32_t writeMask; // Access mask; which all components were written to in the same op + uint32_t index; // For which component was this definition created for? + uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings + struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components. + +#if _DEBUG + bool operator==(const DefineUseChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (writeMask != a.writeMask) + return false; + if (index != a.index) + return false; + if (isStandalone != a.isStandalone) + return false; + + // Just check that each one has the same amount of usages + if (usages.size() != a.usages.size()) + return false; + + return true; + } + +#endif +}; + +typedef std::list DefineUseChain; + +struct UseDefineChainEntry +{ + UseDefineChainEntry() + : psInst(0) + , psOp(0) + , defines() + , accessMask(0) + , index(0) + { + memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *)); + } + + Instruction *psInst; // The use (read from this temp component) + Operand *psOp; // The operand within this instruction for the read + DefineSet defines; // List of writes that are visible to this read + uint32_t accessMask; // Which all components were read together with this one + uint32_t index; // For which component was this usage created for? + struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components. + +#if _DEBUG + bool operator==(const UseDefineChainEntry &a) const + { + if (psInst != a.psInst) + return false; + if (psOp != a.psOp) + return false; + if (accessMask != a.accessMask) + return false; + if (index != a.index) + return false; + + // Just check that each one has the same amount of usages + if (defines.size() != a.defines.size()) + return false; + + return true; + } + +#endif +}; + +typedef std::list UseDefineChain; + +typedef std::map UseDefineChains; +typedef std::map DefineUseChains; +typedef std::vector ActiveDefinitions; + +// Do flow control analysis on the instructions and build the define-use and use-define chains +void BuildUseDefineChains(std::vector &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg); + +// Do temp splitting based on use-define chains +void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector &pui32SplitTable); + +// Based on the sampler precisions, downgrade the definitions if possible. +void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps); + +// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition. +void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps); + +// Write the uses and defines back to Instruction and Operand member lists. +void WriteBackUsesAndDefines(DefineUseChains &psDUChains); diff --git a/third_party/HLSLcc/src/internal_includes/debug.h b/third_party/HLSLcc/src/internal_includes/debug.h new file mode 100644 index 0000000..bc201c0 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/debug.h @@ -0,0 +1,21 @@ +#ifndef DEBUG_H_ +#define DEBUG_H_ + +#ifdef _DEBUG +#include "assert.h" +#define ASSERT(expr) CustomAssert(expr) +static void CustomAssert(int expression) +{ + if (!expression) + { + assert(0); + } +} + +#else +#define UNUSED(EXPR_) \ + do { if (false) (void)(EXPR_); } while(0) +#define ASSERT(expr) UNUSED(expr) +#endif + +#endif diff --git a/third_party/HLSLcc/src/internal_includes/decode.h b/third_party/HLSLcc/src/internal_includes/decode.h new file mode 100644 index 0000000..331cca4 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/decode.h @@ -0,0 +1,10 @@ +#ifndef DECODE_H +#define DECODE_H + +#include "internal_includes/Shader.h" + +Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags); + +void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst); + +#endif diff --git a/third_party/HLSLcc/src/internal_includes/languages.h b/third_party/HLSLcc/src/internal_includes/languages.h new file mode 100644 index 0000000..eefbeca --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/languages.h @@ -0,0 +1,328 @@ +#ifndef LANGUAGES_H +#define LANGUAGES_H + +#include "hlslcc.h" +#include "HLSLCrossCompilerContext.h" +#include "Shader.h" + +static int InOutSupported(const GLLang eLang) +{ + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; +} + +static int WriteToFragData(const GLLang eLang) +{ + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 1; + } + return 0; +} + +static int ShaderBitEncodingSupported(const GLLang eLang) +{ + if (eLang != LANG_ES_300 && + eLang != LANG_ES_310 && + eLang < LANG_330) + { + return 0; + } + return 1; +} + +static int HaveOverloadedTextureFuncs(const GLLang eLang) +{ + if (eLang == LANG_ES_100 || eLang == LANG_120) + { + return 0; + } + return 1; +} + +static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext) +{ + if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0) + return true; + + return false; +} + +//Only enable for ES. Vulkan and Switch. +//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan. +static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext) +{ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0) + return 1; + + const GLLang eLang = psContext->psShader->eTargetLanguage; + if (eLang >= LANG_ES_100 && eLang <= LANG_ES_310) + { + return 1; + } + return 0; +} + +static int EmitLowp(const HLSLCrossCompilerContext *psContext) +{ + const GLLang eLang = psContext->psShader->eTargetLanguage; + return eLang == LANG_ES_100 ? 1 : 0; +} + +static int HaveCubemapArray(const GLLang eLang) +{ + if (eLang >= LANG_400 && eLang <= LANG_GL_LAST) + return 1; + return 0; +} + +static bool IsESLanguage(const GLLang eLang) +{ + return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST); +} + +static bool IsDesktopGLLanguage(const GLLang eLang) +{ + return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST); +} + +//Only on vertex inputs and pixel outputs. +static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions) +{ + if (eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location)) + { + return 1; + } + return 0; +} + +static int HaveInOutLocationQualifier(const GLLang eLang) +{ + if (eLang >= LANG_410 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +//layout(binding = X) uniform {uniformA; uniformB;} +//layout(location = X) uniform uniform_name; +static int HaveUniformBindingsAndLocations(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags) +{ + if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS) + return 0; + + if (eLang >= LANG_430 || eLang == LANG_ES_310 || + (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack)) + { + return 1; + } + return 0; +} + +static int DualSourceBlendSupported(const GLLang eLang) +{ + if (eLang >= LANG_330) + { + return 1; + } + return 0; +} + +static int SubroutinesSupported(const GLLang eLang) +{ + if (eLang >= LANG_400) + { + return 1; + } + return 0; +} + +//Before 430, flat/smooth/centroid/noperspective must match +//between fragment and its previous stage. +//HLSL bytecode only tells us the interpolation in pixel shader. +static int PixelInterpDependency(const GLLang eLang) +{ + if (eLang < LANG_430) + { + return 1; + } + return 0; +} + +static int HaveUnsignedTypes(const GLLang eLang) +{ + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveBitEncodingOps(const GLLang eLang) +{ + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveNativeBitwiseOps(const GLLang eLang) +{ + switch (eLang) + { + case LANG_ES_100: + case LANG_120: + return 0; + default: + break; + } + return 1; +} + +static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL) +{ + // WebGL only allows dynamic indexing with constant expressions, loop indices or a combination. + // The only exception is for uniform access in vertex shaders, which can be indexed using any expression. + + switch (psContext->psShader->eTargetLanguage) + { + case LANG_ES_100: + case LANG_120: + if (psOperand != NULL) + { + if (psOperand->m_ForLoopInductorName) + return 1; + + if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + return 1; + } + + return 0; + default: + break; + } + return 1; +} + +static int HaveGather(const GLLang eLang) +{ + if (eLang >= LANG_400 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveGatherNonConstOffset(const GLLang eLang) +{ + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveQueryLod(const GLLang eLang) +{ + if (eLang >= LANG_400) + { + return 1; + } + return 0; +} + +static int HaveQueryLevels(const GLLang eLang) +{ + if (eLang >= LANG_430) + { + return 1; + } + return 0; +} + +static int HaveFragmentCoordConventions(const GLLang eLang) +{ + if (eLang >= LANG_150) + { + return 1; + } + return 0; +} + +static int HaveGeometryShaderARB(const GLLang eLang) +{ + if (eLang >= LANG_150) + { + return 1; + } + return 0; +} + +static int HaveAtomicCounter(const GLLang eLang) +{ + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveAtomicMem(const GLLang eLang) +{ + if (eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveImageAtomics(const GLLang eLang) +{ + if (eLang >= LANG_420) + { + return 1; + } + return 0; +} + +static int HaveCompute(const GLLang eLang) +{ + if (eLang >= LANG_430 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HaveImageLoadStore(const GLLang eLang) +{ + if (eLang >= LANG_420 || eLang == LANG_ES_310) + { + return 1; + } + return 0; +} + +static int HavePreciseQualifier(const GLLang eLang) +{ + if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang + { + return 1; + } + return 0; +} + +#endif diff --git a/third_party/HLSLcc/src/internal_includes/reflect.h b/third_party/HLSLcc/src/internal_includes/reflect.h new file mode 100644 index 0000000..ddc468c --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/reflect.h @@ -0,0 +1,26 @@ +#ifndef REFLECT_H +#define REFLECT_H + +#include "hlslcc.h" + +struct ShaderPhase_TAG; + +typedef struct +{ + uint32_t* pui32Inputs; + uint32_t* pui32Outputs; + uint32_t* pui32Resources; + uint32_t* pui32Interfaces; + uint32_t* pui32Inputs11; + uint32_t* pui32Outputs11; + uint32_t* pui32OutputsWithStreams; + uint32_t* pui32PatchConstants; + uint32_t* pui32PatchConstants11; +} ReflectionChunks; + +void LoadShaderInfo(const uint32_t ui32MajorVersion, + const uint32_t ui32MinorVersion, + const ReflectionChunks* psChunks, + ShaderInfo* psInfo, uint32_t decodeFlags); + +#endif diff --git a/third_party/HLSLcc/src/internal_includes/toGLSL.h b/third_party/HLSLcc/src/internal_includes/toGLSL.h new file mode 100644 index 0000000..b4ae9a8 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/toGLSL.h @@ -0,0 +1,244 @@ +#pragma once + +#include "hlslcc.h" +#include "internal_includes/Translator.h" + +class HLSLCrossCompilerContext; + +class ToGLSL : public Translator +{ +protected: + GLLang language; + bool m_NeedUnityInstancingArraySizeDecl; + bool m_NeedUnityPreTransformDecl; + +public: + explicit ToGLSL(HLSLCrossCompilerContext* ctx) : + Translator(ctx), + language(LANG_DEFAULT), + m_NeedUnityInstancingArraySizeDecl(false), + m_NeedUnityPreTransformDecl(false), + m_NumDeclaredWhileTrueLoops(0) + {} + // Sets the target language according to given input. if LANG_DEFAULT, does autodetect and returns the selected language + GLLang SetLanguage(GLLang suggestedLanguage); + + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration* psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); + virtual void SetIOPrefixes(); + +private: + void TranslateOperand(bstring glsl, const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); + void TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL, bool forceNoConversion = false); + void TranslateInstruction(Instruction* psInst, bool isEmbedded = false); + + void TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); + void TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion = false); + + void TranslateOperandIndex(const Operand* psOperand, int index); + void TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add); + + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis); + void AddAssignPrologue(int numParenthesis, bool isEmbedded = false); + + + void AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName); + void AddBuiltinInput(const Declaration* psDecl, const char* builtinName); + void HandleOutputRedirect(const Declaration *psDecl, const char *Precision); + void HandleInputRedirect(const Declaration *psDecl, const char *Precision); + + void AddUserOutput(const Declaration* psDecl); + void DeclareStructConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, const Operand* psOperand, bstring glsl); + void DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection); + void PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType); + void DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl); + + void ReportStruct(const std::string &name, const struct ShaderVarType* psType); + + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded = false); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded = false); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + + void HandleSwitchTransformation(Instruction* psInst, bstring glsl); + + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + bool DeclareExtraFunction(const std::string &name, bstring body); + void UseExtraFunctionDependency(const std::string &name); + + void DeclareDynamicIndexWrapper(const struct ShaderVarType* psType); + void DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements); + + bool RenderTargetDeclared(uint32_t input); + + std::string GetVulkanDummySamplerName(); + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + std::vector m_FunctionDefinitionsOrder; + + std::vector m_AdditionalDefinitions; + + std::vector m_DefinedStructs; + + std::set m_DeclaredRenderTarget; + int m_NumDeclaredWhileTrueLoops; + + struct SwitchConversion + { + /* + IF (CONDITION1) BREAK; STATEMENT1; IF (CONDITION2) BREAK; STATEMENT2;... transforms to + if (CONDITION1) {} ELSE { STATEMENT1; IF (CONDITION2) {} ELSE {STATEMENT2; ...} } + thus, we need to count the "BREAK" statements we encountered in each IF on the same level inside a SWITCH. + */ + struct ConditionalInfo + { + int breakCount; // Count BREAK on the same level to emit enough closing braces afterwards + bool breakEncountered; // Just encountered a BREAK statment, potentially need to emit "ELSE" + bool endifEncountered; // We need to check for "ENDIF ELSE" sequence, and not emit "else" if we see it + + ConditionalInfo() : + ConditionalInfo(0, false) + {} + + explicit ConditionalInfo(int initialBreakCount) : + ConditionalInfo(initialBreakCount, false) + {} + + ConditionalInfo(int initialBreakCount, bool withEndif) : + ConditionalInfo(initialBreakCount, withEndif, false) + {} + + ConditionalInfo(int initialBreakCount, bool withEndif, bool withBreak) : + breakCount(initialBreakCount), + endifEncountered(withEndif), + breakEncountered(withBreak) + {} + }; + + bstring switchOperand; + // We defer emitting if (condition) for each CASE statement to concatenate possible CASE A: CASE B:... into one if (). + std::vector currentCaseOperands; + std::vector conditionalsInfo; + int isInLoop; // We don't count "BREAK" (end emit them) if we're in a loop. + bool isFirstCase; + + SwitchConversion() : + switchOperand(bfromcstr("")), + isInLoop(0), + isFirstCase(true) + {} + + SwitchConversion(const SwitchConversion& other) : + switchOperand(bstrcpy(other.switchOperand)), + conditionalsInfo(other.conditionalsInfo), + isInLoop(other.isInLoop), + isFirstCase(other.isFirstCase) + { + currentCaseOperands.reserve(other.currentCaseOperands.size()); + for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) + currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); + } + + SwitchConversion(SwitchConversion&& other) : + switchOperand(other.switchOperand), + currentCaseOperands(std::move(other.currentCaseOperands)), + conditionalsInfo(std::move(other.conditionalsInfo)), + isInLoop(other.isInLoop), + isFirstCase(other.isFirstCase) + { + other.switchOperand = nullptr; + } + + ~SwitchConversion() + { + bdestroy(switchOperand); + for (size_t i = 0; i < currentCaseOperands.size(); ++i) + bdestroy(currentCaseOperands[i]); + } + + SwitchConversion& operator=(const SwitchConversion& other) + { + if (this == &other) + return *this; + + switchOperand = bstrcpy(other.switchOperand); + conditionalsInfo = other.conditionalsInfo; + isInLoop = other.isInLoop; + isFirstCase = other.isFirstCase; + currentCaseOperands.reserve(other.currentCaseOperands.size()); + for (size_t i = 0; i < other.currentCaseOperands.size(); ++i) + currentCaseOperands.push_back(bstrcpy(other.currentCaseOperands[i])); + + return *this; + } + + SwitchConversion& operator=(SwitchConversion&& other) + { + if (this == &other) + return *this; + + switchOperand = other.switchOperand; + conditionalsInfo = std::move(other.conditionalsInfo); + isInLoop = other.isInLoop; + isFirstCase = other.isFirstCase; + currentCaseOperands = std::move(other.currentCaseOperands); + + other.switchOperand = nullptr; + + return *this; + } + }; + std::vector m_SwitchStack; +}; diff --git a/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h b/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h new file mode 100644 index 0000000..deda652 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/toGLSLOperand.h @@ -0,0 +1,26 @@ +#ifndef TO_GLSL_OPERAND_H +#define TO_GLSL_OPERAND_H + +#include +#include "bstrlib.h" +#include "ShaderInfo.h" + +class HLSLCrossCompilerContext; + +//void TranslateOperand(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag); +// Translate operand but add additional component mask +//void TranslateOperandWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask); + +void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase); +void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase); +void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase); + +void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); +std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare); + +std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); +void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare); + +std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name); + +#endif diff --git a/third_party/HLSLcc/src/internal_includes/toMetal.h b/third_party/HLSLcc/src/internal_includes/toMetal.h new file mode 100644 index 0000000..08d8eb1 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/toMetal.h @@ -0,0 +1,182 @@ +#pragma once +#include "internal_includes/Translator.h" +#include +#include + +struct SamplerDesc +{ + std::string name; + uint32_t reg, slot; +}; +struct TextureSamplerDesc +{ + std::string name; + int textureBind, samplerBind; + HLSLCC_TEX_DIMENSION dim; + bool isMultisampled; + bool isDepthSampler; + bool uav; +}; + +class ToMetal : public Translator +{ +public: + explicit ToMetal(HLSLCrossCompilerContext *ctx) + : Translator(ctx) + , m_ShadowSamplerDeclared(false) + , m_NeedFBOutputRemapDecl(false) + , m_NeedFBInputRemapDecl(false) + {} + + virtual bool Translate(); + virtual void TranslateDeclaration(const Declaration *psDecl); + virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL); + std::string TranslateOperand(const Operand *psOp, uint32_t flags, uint32_t ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL); + + virtual void SetIOPrefixes(); + +private: + void TranslateInstruction(Instruction* psInst); + + void DeclareBuiltinInput(const Declaration *psDecl); + void DeclareBuiltinOutput(const Declaration *psDecl); + void DeclareClipPlanes(const Declaration* decl, unsigned declCount); + void GenerateTexturesReflection(HLSLccReflection* refl); + + // Retrieve the name of the output struct for this shader + std::string GetOutputStructName() const; + std::string GetInputStructName() const; + std::string GetCBName(const std::string& cbName) const; + + void DeclareHullShaderPassthrough(); + void HandleInputRedirect(const Declaration *psDecl, const std::string &typeName); + void HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName); + + void DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0, bool stripUnused = false); + void DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB = false, uint32_t cumulativeOffset = 0); + void DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); + void DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB = false, uint32_t cumulativeOffset = 0, bool isUsed = true); + void DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV); + + void DeclareResource(const Declaration *psDecl); + void TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim); + + void DeclareOutput(const Declaration *decl); + + void PrintStructDeclarations(StructDefinitions &defs, const char *name = ""); + + std::string ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber); + + // ToMetalOperand.cpp + std::string TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot = true); + std::string TranslateOperandIndex(const Operand* psOperand, int index); + std::string TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase); + + // ToMetalInstruction.cpp + + void AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask); + void AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis); + void AddAssignPrologue(int numParenthesis); + + typedef enum + { + CMP_EQ, + CMP_LT, + CMP_GE, + CMP_NE, + } ComparisonType; + + void AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag); + + bool CanForceToHalfOperand(const Operand *psOperand); + + void AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise); + void AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise); + void CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType); + void CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags); + void CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask); + void CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask); + void CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask); + void CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask); + void TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl); + void TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand); + void GetResInfoData(Instruction* psInst, int index, int destElem); + void TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags); + void TranslateDynamicComponentSelection(const ShaderVarType* psVarType, + const Operand* psByteAddr, uint32_t offset, uint32_t mask); + void TranslateShaderStorageStore(Instruction* psInst); + void TranslateShaderStorageLoad(Instruction* psInst); + void TranslateAtomicMemOp(Instruction* psInst); + void TranslateConditional( + Instruction* psInst, + bstring glsl); + + // The map is keyed by struct name. The special name "" (empty string) is reserved for entry point function parameters + StructDefinitions m_StructDefinitions; + + // A map of extra helper functions we'll need. + FunctionDefinitions m_FunctionDefinitions; + + BindingSlotAllocator m_TextureSlots, m_SamplerSlots; + BindingSlotAllocator m_BufferSlots; + + struct BufferReflection + { + uint32_t bind; + bool isUAV; + bool hasCounter; + }; + std::map m_BufferReflections; + + std::vector m_Samplers; + std::vector m_Textures; + + std::string m_ExtraGlobalDefinitions; + + // Flags for whether we need to add the declaration for the FB IO remaps + bool m_NeedFBInputRemapDecl; + bool m_NeedFBOutputRemapDecl; + + bool m_ShadowSamplerDeclared; + + void EnsureShadowSamplerDeclared(); + + // Add an extra function to the m_FunctionDefinitions list, unless it's already there. + void DeclareExtraFunction(const std::string &name, const std::string &body); + + // Move all lowp -> mediump + void ClampPartialPrecisions(); + + // Reseve UAV slots in advance to match the original HLSL bindings -> correct bindings in SetRandomWriteTarget() + void ReserveUAVBindingSlots(ShaderPhase *phase); +}; diff --git a/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h b/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h new file mode 100644 index 0000000..2052009 --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/toMetalDeclaration.h @@ -0,0 +1,3 @@ +#pragma once + +#include "internal_includes/Declaration.h" diff --git a/third_party/HLSLcc/src/internal_includes/tokens.h b/third_party/HLSLcc/src/internal_includes/tokens.h new file mode 100644 index 0000000..671ccbd --- /dev/null +++ b/third_party/HLSLcc/src/internal_includes/tokens.h @@ -0,0 +1,789 @@ +#ifndef TOKENS_H +#define TOKENS_H + +#include "hlslcc.h" + +enum SHADER_PHASE_TYPE +{ + SHADER_PHASE_INVALID = -1, + MAIN_PHASE = 0, + HS_GLOBAL_DECL_PHASE = 1, + HS_CTRL_POINT_PHASE = 2, + HS_FORK_PHASE = 3, + HS_JOIN_PHASE = 4 +}; + +static SHADER_TYPE DecodeShaderType(uint32_t ui32Token) +{ + return (SHADER_TYPE)((ui32Token & 0xffff0000) >> 16); +} + +static uint32_t DecodeProgramMajorVersion(uint32_t ui32Token) +{ + return (ui32Token & 0x000000f0) >> 4; +} + +static uint32_t DecodeProgramMinorVersion(uint32_t ui32Token) +{ + return (ui32Token & 0x0000000f); +} + +static uint32_t DecodeInstructionLength(uint32_t ui32Token) +{ + return (ui32Token & 0x7f000000) >> 24; +} + +static uint32_t DecodeIsOpcodeExtended(uint32_t ui32Token) +{ + return (ui32Token & 0x80000000) >> 31; +} + +typedef enum EXTENDED_OPCODE_TYPE +{ + EXTENDED_OPCODE_EMPTY = 0, + EXTENDED_OPCODE_SAMPLE_CONTROLS = 1, + EXTENDED_OPCODE_RESOURCE_DIM = 2, + EXTENDED_OPCODE_RESOURCE_RETURN_TYPE = 3, +} EXTENDED_OPCODE_TYPE; + +static EXTENDED_OPCODE_TYPE DecodeExtendedOpcodeType(uint32_t ui32Token) +{ + return (EXTENDED_OPCODE_TYPE)(ui32Token & 0x0000003f); +} + +static RESOURCE_RETURN_TYPE DecodeResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) +{ + return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4)) & 0xF); +} + +static RESOURCE_RETURN_TYPE DecodeExtendedResourceReturnType(uint32_t ui32Coord, uint32_t ui32Token) +{ + return (RESOURCE_RETURN_TYPE)((ui32Token >> (ui32Coord * 4 + 6)) & 0xF); +} + +enum OPCODE_TYPE +{ + //For DX9 + OPCODE_POW = -6, + OPCODE_DP2ADD = -5, + OPCODE_LRP = -4, + OPCODE_ENDREP = -3, + OPCODE_REP = -2, + OPCODE_SPECIAL_DCL_IMMCONST = -1, + + OPCODE_ADD, + OPCODE_AND, + OPCODE_BREAK, + OPCODE_BREAKC, + OPCODE_CALL, + OPCODE_CALLC, + OPCODE_CASE, + OPCODE_CONTINUE, + OPCODE_CONTINUEC, + OPCODE_CUT, + OPCODE_DEFAULT, + OPCODE_DERIV_RTX, + OPCODE_DERIV_RTY, + OPCODE_DISCARD, + OPCODE_DIV, + OPCODE_DP2, + OPCODE_DP3, + OPCODE_DP4, + OPCODE_ELSE, + OPCODE_EMIT, + OPCODE_EMITTHENCUT, + OPCODE_ENDIF, + OPCODE_ENDLOOP, + OPCODE_ENDSWITCH, + OPCODE_EQ, + OPCODE_EXP, + OPCODE_FRC, + OPCODE_FTOI, + OPCODE_FTOU, + OPCODE_GE, + OPCODE_IADD, + OPCODE_IF, + OPCODE_IEQ, + OPCODE_IGE, + OPCODE_ILT, + OPCODE_IMAD, + OPCODE_IMAX, + OPCODE_IMIN, + OPCODE_IMUL, + OPCODE_INE, + OPCODE_INEG, + OPCODE_ISHL, + OPCODE_ISHR, + OPCODE_ITOF, + OPCODE_LABEL, + OPCODE_LD, + OPCODE_LD_MS, + OPCODE_LOG, + OPCODE_LOOP, + OPCODE_LT, + OPCODE_MAD, + OPCODE_MIN, + OPCODE_MAX, + OPCODE_CUSTOMDATA, + OPCODE_MOV, + OPCODE_MOVC, + OPCODE_MUL, + OPCODE_NE, + OPCODE_NOP, + OPCODE_NOT, + OPCODE_OR, + OPCODE_RESINFO, + OPCODE_RET, + OPCODE_RETC, + OPCODE_ROUND_NE, + OPCODE_ROUND_NI, + OPCODE_ROUND_PI, + OPCODE_ROUND_Z, + OPCODE_RSQ, + OPCODE_SAMPLE, + OPCODE_SAMPLE_C, + OPCODE_SAMPLE_C_LZ, + OPCODE_SAMPLE_L, + OPCODE_SAMPLE_D, + OPCODE_SAMPLE_B, + OPCODE_SQRT, + OPCODE_SWITCH, + OPCODE_SINCOS, + OPCODE_UDIV, + OPCODE_ULT, + OPCODE_UGE, + OPCODE_UMUL, + OPCODE_UMAD, + OPCODE_UMAX, + OPCODE_UMIN, + OPCODE_USHR, + OPCODE_UTOF, + OPCODE_XOR, + OPCODE_DCL_RESOURCE, // DCL* opcodes have + OPCODE_DCL_CONSTANT_BUFFER, // custom operand formats. + OPCODE_DCL_SAMPLER, + OPCODE_DCL_INDEX_RANGE, + OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY, + OPCODE_DCL_GS_INPUT_PRIMITIVE, + OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT, + OPCODE_DCL_INPUT, + OPCODE_DCL_INPUT_SGV, + OPCODE_DCL_INPUT_SIV, + OPCODE_DCL_INPUT_PS, + OPCODE_DCL_INPUT_PS_SGV, + OPCODE_DCL_INPUT_PS_SIV, + OPCODE_DCL_OUTPUT, + OPCODE_DCL_OUTPUT_SGV, + OPCODE_DCL_OUTPUT_SIV, + OPCODE_DCL_TEMPS, + OPCODE_DCL_INDEXABLE_TEMP, + OPCODE_DCL_GLOBAL_FLAGS, + +// ----------------------------------------------- + + OPCODE_RESERVED_10, + +// ---------- DX 10.1 op codes--------------------- + + OPCODE_LOD, + OPCODE_GATHER4, + OPCODE_SAMPLE_POS, + OPCODE_SAMPLE_INFO, + +// ----------------------------------------------- + + // This should be 10.1's version of NUM_OPCODES + OPCODE_RESERVED_10_1, + +// ---------- DX 11 op codes--------------------- + OPCODE_HS_DECLS, // token marks beginning of HS sub-shader + OPCODE_HS_CONTROL_POINT_PHASE, // token marks beginning of HS sub-shader + OPCODE_HS_FORK_PHASE, // token marks beginning of HS sub-shader + OPCODE_HS_JOIN_PHASE, // token marks beginning of HS sub-shader + + OPCODE_EMIT_STREAM, + OPCODE_CUT_STREAM, + OPCODE_EMITTHENCUT_STREAM, + OPCODE_INTERFACE_CALL, + + OPCODE_BUFINFO, + OPCODE_DERIV_RTX_COARSE, + OPCODE_DERIV_RTX_FINE, + OPCODE_DERIV_RTY_COARSE, + OPCODE_DERIV_RTY_FINE, + OPCODE_GATHER4_C, + OPCODE_GATHER4_PO, + OPCODE_GATHER4_PO_C, + OPCODE_RCP, + OPCODE_F32TOF16, + OPCODE_F16TOF32, + OPCODE_UADDC, + OPCODE_USUBB, + OPCODE_COUNTBITS, + OPCODE_FIRSTBIT_HI, + OPCODE_FIRSTBIT_LO, + OPCODE_FIRSTBIT_SHI, + OPCODE_UBFE, + OPCODE_IBFE, + OPCODE_BFI, + OPCODE_BFREV, + OPCODE_SWAPC, + + OPCODE_DCL_STREAM, + OPCODE_DCL_FUNCTION_BODY, + OPCODE_DCL_FUNCTION_TABLE, + OPCODE_DCL_INTERFACE, + + OPCODE_DCL_INPUT_CONTROL_POINT_COUNT, + OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT, + OPCODE_DCL_TESS_DOMAIN, + OPCODE_DCL_TESS_PARTITIONING, + OPCODE_DCL_TESS_OUTPUT_PRIMITIVE, + OPCODE_DCL_HS_MAX_TESSFACTOR, + OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT, + OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT, + + OPCODE_DCL_THREAD_GROUP, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW, + OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED, + OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW, + OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED, + OPCODE_DCL_RESOURCE_RAW, + OPCODE_DCL_RESOURCE_STRUCTURED, + OPCODE_LD_UAV_TYPED, + OPCODE_STORE_UAV_TYPED, + OPCODE_LD_RAW, + OPCODE_STORE_RAW, + OPCODE_LD_STRUCTURED, + OPCODE_STORE_STRUCTURED, + OPCODE_ATOMIC_AND, + OPCODE_ATOMIC_OR, + OPCODE_ATOMIC_XOR, + OPCODE_ATOMIC_CMP_STORE, + OPCODE_ATOMIC_IADD, + OPCODE_ATOMIC_IMAX, + OPCODE_ATOMIC_IMIN, + OPCODE_ATOMIC_UMAX, + OPCODE_ATOMIC_UMIN, + OPCODE_IMM_ATOMIC_ALLOC, + OPCODE_IMM_ATOMIC_CONSUME, + OPCODE_IMM_ATOMIC_IADD, + OPCODE_IMM_ATOMIC_AND, + OPCODE_IMM_ATOMIC_OR, + OPCODE_IMM_ATOMIC_XOR, + OPCODE_IMM_ATOMIC_EXCH, + OPCODE_IMM_ATOMIC_CMP_EXCH, + OPCODE_IMM_ATOMIC_IMAX, + OPCODE_IMM_ATOMIC_IMIN, + OPCODE_IMM_ATOMIC_UMAX, + OPCODE_IMM_ATOMIC_UMIN, + OPCODE_SYNC, + + OPCODE_DADD, + OPCODE_DMAX, + OPCODE_DMIN, + OPCODE_DMUL, + OPCODE_DEQ, + OPCODE_DGE, + OPCODE_DLT, + OPCODE_DNE, + OPCODE_DMOV, + OPCODE_DMOVC, + OPCODE_DTOF, + OPCODE_FTOD, + + OPCODE_EVAL_SNAPPED, + OPCODE_EVAL_SAMPLE_INDEX, + OPCODE_EVAL_CENTROID, + + OPCODE_DCL_GS_INSTANCE_COUNT, + + OPCODE_ABORT, + OPCODE_DEBUG_BREAK, + +// ----------------------------------------------- + + // This marks the end of D3D11.0 opcodes + OPCODE_RESERVED_11, + + OPCODE_DDIV, + OPCODE_DFMA, + OPCODE_DRCP, + + OPCODE_MSAD, + + OPCODE_DTOI, + OPCODE_DTOU, + OPCODE_ITOD, + OPCODE_UTOD, + +// ----------------------------------------------- + + // This marks the end of D3D11.1 opcodes + OPCODE_RESERVED_11_1, + + NUM_OPCODES, + OPCODE_INVALID = NUM_OPCODES, +}; + +static OPCODE_TYPE DecodeOpcodeType(uint32_t ui32Token) +{ + return (OPCODE_TYPE)(ui32Token & 0x00007ff); +} + +typedef enum +{ + INDEX_0D, + INDEX_1D, + INDEX_2D, + INDEX_3D, +} OPERAND_INDEX_DIMENSION; + +static OPERAND_INDEX_DIMENSION DecodeOperandIndexDimension(uint32_t ui32Token) +{ + return (OPERAND_INDEX_DIMENSION)((ui32Token & 0x00300000) >> 20); +} + +typedef enum OPERAND_TYPE +{ + OPERAND_TYPE_SPECIAL_LOOPCOUNTER = -10, + OPERAND_TYPE_SPECIAL_IMMCONSTINT = -9, + OPERAND_TYPE_SPECIAL_TEXCOORD = -8, + OPERAND_TYPE_SPECIAL_POSITION = -7, + OPERAND_TYPE_SPECIAL_FOG = -6, + OPERAND_TYPE_SPECIAL_POINTSIZE = -5, + OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR = -4, + OPERAND_TYPE_SPECIAL_OUTBASECOLOUR = -3, + OPERAND_TYPE_SPECIAL_ADDRESS = -2, + OPERAND_TYPE_SPECIAL_IMMCONST = -1, + OPERAND_TYPE_TEMP = 0, // Temporary Register File + OPERAND_TYPE_INPUT = 1, // General Input Register File + OPERAND_TYPE_OUTPUT = 2, // General Output Register File + OPERAND_TYPE_INDEXABLE_TEMP = 3, // Temporary Register File (indexable) + OPERAND_TYPE_IMMEDIATE32 = 4, // 32bit/component immediate value(s) + // If for example, operand token bits + // [01:00]==OPERAND_4_COMPONENT, + // this means that the operand type: + // OPERAND_TYPE_IMMEDIATE32 + // results in 4 additional 32bit + // DWORDS present for the operand. + OPERAND_TYPE_IMMEDIATE64 = 5, // 64bit/comp.imm.val(s)HI:LO + OPERAND_TYPE_SAMPLER = 6, // Reference to sampler state + OPERAND_TYPE_RESOURCE = 7, // Reference to memory resource (e.g. texture) + OPERAND_TYPE_CONSTANT_BUFFER = 8, // Reference to constant buffer + OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER = 9, // Reference to immediate constant buffer + OPERAND_TYPE_LABEL = 10, // Label + OPERAND_TYPE_INPUT_PRIMITIVEID = 11, // Input primitive ID + OPERAND_TYPE_OUTPUT_DEPTH = 12, // Output Depth + OPERAND_TYPE_NULL = 13, // Null register, used to discard results of operations + // Below Are operands new in DX 10.1 + OPERAND_TYPE_RASTERIZER = 14, // DX10.1 Rasterizer register, used to denote the depth/stencil and render target resources + OPERAND_TYPE_OUTPUT_COVERAGE_MASK = 15, // DX10.1 PS output MSAA coverage mask (scalar) + // Below Are operands new in DX 11 + OPERAND_TYPE_STREAM = 16, // Reference to GS stream output resource + OPERAND_TYPE_FUNCTION_BODY = 17, // Reference to a function definition + OPERAND_TYPE_FUNCTION_TABLE = 18, // Reference to a set of functions used by a class + OPERAND_TYPE_INTERFACE = 19, // Reference to an interface + OPERAND_TYPE_FUNCTION_INPUT = 20, // Reference to an input parameter to a function + OPERAND_TYPE_FUNCTION_OUTPUT = 21, // Reference to an output parameter to a function + OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID = 22, // HS Control Point phase input saying which output control point ID this is + OPERAND_TYPE_INPUT_FORK_INSTANCE_ID = 23, // HS Fork Phase input instance ID + OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID = 24, // HS Join Phase input instance ID + OPERAND_TYPE_INPUT_CONTROL_POINT = 25, // HS Fork+Join, DS phase input control points (array of them) + OPERAND_TYPE_OUTPUT_CONTROL_POINT = 26, // HS Fork+Join phase output control points (array of them) + OPERAND_TYPE_INPUT_PATCH_CONSTANT = 27, // DS+HSJoin Input Patch Constants (array of them) + OPERAND_TYPE_INPUT_DOMAIN_POINT = 28, // DS Input Domain point + OPERAND_TYPE_THIS_POINTER = 29, // Reference to an interface this pointer + OPERAND_TYPE_UNORDERED_ACCESS_VIEW = 30, // Reference to UAV u# + OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY = 31, // Reference to Thread Group Shared Memory g# + OPERAND_TYPE_INPUT_THREAD_ID = 32, // Compute Shader Thread ID + OPERAND_TYPE_INPUT_THREAD_GROUP_ID = 33, // Compute Shader Thread Group ID + OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP = 34, // Compute Shader Thread ID In Thread Group + OPERAND_TYPE_INPUT_COVERAGE_MASK = 35, // Pixel shader coverage mask input + OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED = 36, // Compute Shader Thread ID In Group Flattened to a 1D value. + OPERAND_TYPE_INPUT_GS_INSTANCE_ID = 37, // Input GS instance ID + OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL = 38, // Output Depth, forced to be greater than or equal than current depth + OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL = 39, // Output Depth, forced to be less than or equal to current depth + OPERAND_TYPE_CYCLE_COUNTER = 40, // Cycle counter +} OPERAND_TYPE; + +static OPERAND_TYPE DecodeOperandType(uint32_t ui32Token) +{ + return (OPERAND_TYPE)((ui32Token & 0x000ff000) >> 12); +} + +static SPECIAL_NAME DecodeOperandSpecialName(uint32_t ui32Token) +{ + return (SPECIAL_NAME)(ui32Token & 0x0000ffff); +} + +typedef enum OPERAND_INDEX_REPRESENTATION +{ + OPERAND_INDEX_IMMEDIATE32 = 0, // Extra DWORD + OPERAND_INDEX_IMMEDIATE64 = 1, // 2 Extra DWORDs + // (HI32:LO32) + OPERAND_INDEX_RELATIVE = 2, // Extra operand + OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE = 3, // Extra DWORD followed by + // extra operand + OPERAND_INDEX_IMMEDIATE64_PLUS_RELATIVE = 4, // 2 Extra DWORDS + // (HI32:LO32) followed + // by extra operand +} OPERAND_INDEX_REPRESENTATION; + +static OPERAND_INDEX_REPRESENTATION DecodeOperandIndexRepresentation(uint32_t ui32Dimension, uint32_t ui32Token) +{ + return (OPERAND_INDEX_REPRESENTATION)((ui32Token & (0x3 << (22 + 3 * ((ui32Dimension) & 3)))) >> (22 + 3 * ((ui32Dimension) & 3))); +} + +typedef enum OPERAND_NUM_COMPONENTS +{ + OPERAND_0_COMPONENT = 0, + OPERAND_1_COMPONENT = 1, + OPERAND_4_COMPONENT = 2, + OPERAND_N_COMPONENT = 3 // unused for now +} OPERAND_NUM_COMPONENTS; + +static OPERAND_NUM_COMPONENTS DecodeOperandNumComponents(uint32_t ui32Token) +{ + return (OPERAND_NUM_COMPONENTS)(ui32Token & 0x00000003); +} + +typedef enum OPERAND_4_COMPONENT_SELECTION_MODE +{ + OPERAND_4_COMPONENT_MASK_MODE = 0, // mask 4 components + OPERAND_4_COMPONENT_SWIZZLE_MODE = 1, // swizzle 4 components + OPERAND_4_COMPONENT_SELECT_1_MODE = 2, // select 1 of 4 components +} OPERAND_4_COMPONENT_SELECTION_MODE; + +static OPERAND_4_COMPONENT_SELECTION_MODE DecodeOperand4CompSelMode(uint32_t ui32Token) +{ + return (OPERAND_4_COMPONENT_SELECTION_MODE)((ui32Token & 0x0000000c) >> 2); +} + +#define OPERAND_4_COMPONENT_MASK_X 0x00000001 +#define OPERAND_4_COMPONENT_MASK_Y 0x00000002 +#define OPERAND_4_COMPONENT_MASK_Z 0x00000004 +#define OPERAND_4_COMPONENT_MASK_W 0x00000008 +#define OPERAND_4_COMPONENT_MASK_R OPERAND_4_COMPONENT_MASK_X +#define OPERAND_4_COMPONENT_MASK_G OPERAND_4_COMPONENT_MASK_Y +#define OPERAND_4_COMPONENT_MASK_B OPERAND_4_COMPONENT_MASK_Z +#define OPERAND_4_COMPONENT_MASK_A OPERAND_4_COMPONENT_MASK_W +#define OPERAND_4_COMPONENT_MASK_ALL 0x0000000f + +static uint32_t DecodeOperand4CompMask(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x000000f0) >> 4); +} + +static uint32_t DecodeOperand4CompSwizzle(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x00000ff0) >> 4); +} + +static uint32_t DecodeOperand4CompSel1(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x00000030) >> 4); +} + +#define OPERAND_4_COMPONENT_X 0 +#define OPERAND_4_COMPONENT_Y 1 +#define OPERAND_4_COMPONENT_Z 2 +#define OPERAND_4_COMPONENT_W 3 + +static const uint32_t NO_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_W << 6)) /*<<4*/); + +static const uint32_t XXXX_SWIZZLE = (((OPERAND_4_COMPONENT_X) | (OPERAND_4_COMPONENT_X << 2) | (OPERAND_4_COMPONENT_X << 4) | (OPERAND_4_COMPONENT_X << 6))); +static const uint32_t YYYY_SWIZZLE = (((OPERAND_4_COMPONENT_Y) | (OPERAND_4_COMPONENT_Y << 2) | (OPERAND_4_COMPONENT_Y << 4) | (OPERAND_4_COMPONENT_Y << 6))); +static const uint32_t ZZZZ_SWIZZLE = (((OPERAND_4_COMPONENT_Z) | (OPERAND_4_COMPONENT_Z << 2) | (OPERAND_4_COMPONENT_Z << 4) | (OPERAND_4_COMPONENT_Z << 6))); +static const uint32_t WWWW_SWIZZLE = (((OPERAND_4_COMPONENT_W) | (OPERAND_4_COMPONENT_W << 2) | (OPERAND_4_COMPONENT_W << 4) | (OPERAND_4_COMPONENT_W << 6))); + +static uint32_t DecodeOperand4CompSwizzleSource(uint32_t ui32Token, uint32_t comp) +{ + return (uint32_t)(((ui32Token) >> (4 + 2 * ((comp) & 3))) & 3); +} + +typedef enum RESOURCE_DIMENSION +{ + RESOURCE_DIMENSION_UNKNOWN = 0, + RESOURCE_DIMENSION_BUFFER = 1, + RESOURCE_DIMENSION_TEXTURE1D = 2, + RESOURCE_DIMENSION_TEXTURE2D = 3, + RESOURCE_DIMENSION_TEXTURE2DMS = 4, + RESOURCE_DIMENSION_TEXTURE3D = 5, + RESOURCE_DIMENSION_TEXTURECUBE = 6, + RESOURCE_DIMENSION_TEXTURE1DARRAY = 7, + RESOURCE_DIMENSION_TEXTURE2DARRAY = 8, + RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 9, + RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10, + RESOURCE_DIMENSION_RAW_BUFFER = 11, + RESOURCE_DIMENSION_STRUCTURED_BUFFER = 12, +} RESOURCE_DIMENSION; + +static RESOURCE_DIMENSION DecodeResourceDimension(uint32_t ui32Token) +{ + return (RESOURCE_DIMENSION)((ui32Token & 0x0000f800) >> 11); +} + +static RESOURCE_DIMENSION DecodeExtendedResourceDimension(uint32_t ui32Token) +{ + return (RESOURCE_DIMENSION)((ui32Token & 0x000007C0) >> 6); +} + +typedef enum INSTRUCTION_TEST_BOOLEAN +{ + INSTRUCTION_TEST_ZERO = 0, + INSTRUCTION_TEST_NONZERO = 1 +} INSTRUCTION_TEST_BOOLEAN; + +static INSTRUCTION_TEST_BOOLEAN DecodeInstrTestBool(uint32_t ui32Token) +{ + return (INSTRUCTION_TEST_BOOLEAN)((ui32Token & 0x00040000) >> 18); +} + +static uint32_t DecodeIsOperandExtended(uint32_t ui32Token) +{ + return (ui32Token & 0x80000000) >> 31; +} + +typedef enum EXTENDED_OPERAND_TYPE +{ + EXTENDED_OPERAND_EMPTY = 0, + EXTENDED_OPERAND_MODIFIER = 1, +} EXTENDED_OPERAND_TYPE; + +static EXTENDED_OPERAND_TYPE DecodeExtendedOperandType(uint32_t ui32Token) +{ + return (EXTENDED_OPERAND_TYPE)(ui32Token & 0x0000003f); +} + +typedef enum OPERAND_MODIFIER +{ + OPERAND_MODIFIER_NONE = 0, + OPERAND_MODIFIER_NEG = 1, + OPERAND_MODIFIER_ABS = 2, + OPERAND_MODIFIER_ABSNEG = 3, +} OPERAND_MODIFIER; + +static OPERAND_MODIFIER DecodeExtendedOperandModifier(uint32_t ui32Token) +{ + return (OPERAND_MODIFIER)((ui32Token & 0x00003fc0) >> 6); +} + +static const uint32_t GLOBAL_FLAG_REFACTORING_ALLOWED = (1 << 11); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS = (1 << 12); +static const uint32_t GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL = (1 << 13); +static const uint32_t GLOBAL_FLAG_ENABLE_RAW_AND_STRUCTURED_BUFFERS = (1 << 14); +static const uint32_t GLOBAL_FLAG_SKIP_OPTIMIZATION = (1 << 15); +static const uint32_t GLOBAL_FLAG_ENABLE_MINIMUM_PRECISION = (1 << 16); +static const uint32_t GLOBAL_FLAG_ENABLE_DOUBLE_EXTENSIONS = (1 << 17); +static const uint32_t GLOBAL_FLAG_ENABLE_SHADER_EXTENSIONS = (1 << 18); + +static uint32_t DecodeGlobalFlags(uint32_t ui32Token) +{ + return (uint32_t)(ui32Token & 0x00fff800); +} + +static INTERPOLATION_MODE DecodeInterpolationMode(uint32_t ui32Token) +{ + return (INTERPOLATION_MODE)((ui32Token & 0x00007800) >> 11); +} + +typedef enum PRIMITIVE_TOPOLOGY +{ + PRIMITIVE_TOPOLOGY_UNDEFINED = 0, + PRIMITIVE_TOPOLOGY_POINTLIST = 1, + PRIMITIVE_TOPOLOGY_LINELIST = 2, + PRIMITIVE_TOPOLOGY_LINESTRIP = 3, + PRIMITIVE_TOPOLOGY_TRIANGLELIST = 4, + PRIMITIVE_TOPOLOGY_TRIANGLESTRIP = 5, + // 6 is reserved for legacy triangle fans + // Adjacency values should be equal to (0x8 & non-adjacency): + PRIMITIVE_TOPOLOGY_LINELIST_ADJ = 10, + PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ = 11, + PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ = 12, + PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ = 13, +} PRIMITIVE_TOPOLOGY; + +static PRIMITIVE_TOPOLOGY DecodeGSOutputPrimitiveTopology(uint32_t ui32Token) +{ + return (PRIMITIVE_TOPOLOGY)((ui32Token & 0x0001f800) >> 11); +} + +typedef enum PRIMITIVE +{ + PRIMITIVE_UNDEFINED = 0, + PRIMITIVE_POINT = 1, + PRIMITIVE_LINE = 2, + PRIMITIVE_TRIANGLE = 3, + // Adjacency values should be equal to (0x4 & non-adjacency): + PRIMITIVE_LINE_ADJ = 6, + PRIMITIVE_TRIANGLE_ADJ = 7, + PRIMITIVE_1_CONTROL_POINT_PATCH = 8, + PRIMITIVE_2_CONTROL_POINT_PATCH = 9, + PRIMITIVE_3_CONTROL_POINT_PATCH = 10, + PRIMITIVE_4_CONTROL_POINT_PATCH = 11, + PRIMITIVE_5_CONTROL_POINT_PATCH = 12, + PRIMITIVE_6_CONTROL_POINT_PATCH = 13, + PRIMITIVE_7_CONTROL_POINT_PATCH = 14, + PRIMITIVE_8_CONTROL_POINT_PATCH = 15, + PRIMITIVE_9_CONTROL_POINT_PATCH = 16, + PRIMITIVE_10_CONTROL_POINT_PATCH = 17, + PRIMITIVE_11_CONTROL_POINT_PATCH = 18, + PRIMITIVE_12_CONTROL_POINT_PATCH = 19, + PRIMITIVE_13_CONTROL_POINT_PATCH = 20, + PRIMITIVE_14_CONTROL_POINT_PATCH = 21, + PRIMITIVE_15_CONTROL_POINT_PATCH = 22, + PRIMITIVE_16_CONTROL_POINT_PATCH = 23, + PRIMITIVE_17_CONTROL_POINT_PATCH = 24, + PRIMITIVE_18_CONTROL_POINT_PATCH = 25, + PRIMITIVE_19_CONTROL_POINT_PATCH = 26, + PRIMITIVE_20_CONTROL_POINT_PATCH = 27, + PRIMITIVE_21_CONTROL_POINT_PATCH = 28, + PRIMITIVE_22_CONTROL_POINT_PATCH = 29, + PRIMITIVE_23_CONTROL_POINT_PATCH = 30, + PRIMITIVE_24_CONTROL_POINT_PATCH = 31, + PRIMITIVE_25_CONTROL_POINT_PATCH = 32, + PRIMITIVE_26_CONTROL_POINT_PATCH = 33, + PRIMITIVE_27_CONTROL_POINT_PATCH = 34, + PRIMITIVE_28_CONTROL_POINT_PATCH = 35, + PRIMITIVE_29_CONTROL_POINT_PATCH = 36, + PRIMITIVE_30_CONTROL_POINT_PATCH = 37, + PRIMITIVE_31_CONTROL_POINT_PATCH = 38, + PRIMITIVE_32_CONTROL_POINT_PATCH = 39, +} PRIMITIVE; + +static PRIMITIVE DecodeGSInputPrimitive(uint32_t ui32Token) +{ + return (PRIMITIVE)((ui32Token & 0x0001f800) >> 11); +} + +static TESSELLATOR_PARTITIONING DecodeTessPartitioning(uint32_t ui32Token) +{ + return (TESSELLATOR_PARTITIONING)((ui32Token & 0x00003800) >> 11); +} + +static TESSELLATOR_DOMAIN DecodeTessDomain(uint32_t ui32Token) +{ + return (TESSELLATOR_DOMAIN)((ui32Token & 0x00001800) >> 11); +} + +static TESSELLATOR_OUTPUT_PRIMITIVE DecodeTessOutPrim(uint32_t ui32Token) +{ + return (TESSELLATOR_OUTPUT_PRIMITIVE)((ui32Token & 0x00003800) >> 11); +} + +static const uint32_t SYNC_THREADS_IN_GROUP = 0x00000800; +static const uint32_t SYNC_THREAD_GROUP_SHARED_MEMORY = 0x00001000; +static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP = 0x00002000; +static const uint32_t SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL = 0x00004000; + +static uint32_t DecodeSyncFlags(uint32_t ui32Token) +{ + return ui32Token & 0x00007800; +} + +// The number of types that implement this interface +static uint32_t DecodeInterfaceTableLength(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0x0000ffff) >> 0); +} + +// The number of interfaces that are defined in this array. +static uint32_t DecodeInterfaceArrayLength(uint32_t ui32Token) +{ + return (uint32_t)((ui32Token & 0xffff0000) >> 16); +} + +typedef enum CUSTOMDATA_CLASS +{ + CUSTOMDATA_COMMENT = 0, + CUSTOMDATA_DEBUGINFO, + CUSTOMDATA_OPAQUE, + CUSTOMDATA_DCL_IMMEDIATE_CONSTANT_BUFFER, + CUSTOMDATA_SHADER_MESSAGE, +} CUSTOMDATA_CLASS; + +static CUSTOMDATA_CLASS DecodeCustomDataClass(uint32_t ui32Token) +{ + return (CUSTOMDATA_CLASS)((ui32Token & 0xfffff800) >> 11); +} + +static uint32_t DecodeInstructionSaturate(uint32_t ui32Token) +{ + return (ui32Token & 0x00002000) ? 1 : 0; +} + +static uint32_t DecodeInstructionPreciseMask(uint32_t ui32Token) // "precise" keyword +{ + return (uint32_t)((ui32Token & 0x00780000) >> 19); +} + +typedef enum OPERAND_MIN_PRECISION +{ + OPERAND_MIN_PRECISION_DEFAULT = 0, // Default precision + // for the shader model + OPERAND_MIN_PRECISION_FLOAT_16 = 1, // Min 16 bit/component float + OPERAND_MIN_PRECISION_FLOAT_2_8 = 2, // Min 10(2.8)bit/comp. float + OPERAND_MIN_PRECISION_SINT_16 = 4, // Min 16 bit/comp. signed integer + OPERAND_MIN_PRECISION_UINT_16 = 5, // Min 16 bit/comp. unsigned integer +} OPERAND_MIN_PRECISION; + +static uint32_t DecodeOperandMinPrecision(uint32_t ui32Token) +{ + return (ui32Token & 0x0001C000) >> 14; +} + +static uint32_t DecodeOutputControlPointCount(uint32_t ui32Token) +{ + return ((ui32Token & 0x0001f800) >> 11); +} + +typedef enum IMMEDIATE_ADDRESS_OFFSET_COORD +{ + IMMEDIATE_ADDRESS_OFFSET_U = 0, + IMMEDIATE_ADDRESS_OFFSET_V = 1, + IMMEDIATE_ADDRESS_OFFSET_W = 2, +} IMMEDIATE_ADDRESS_OFFSET_COORD; + + +#define IMMEDIATE_ADDRESS_OFFSET_SHIFT(Coord) (9+4*((Coord)&3)) +#define IMMEDIATE_ADDRESS_OFFSET_MASK(Coord) (0x0000000f<> (IMMEDIATE_ADDRESS_OFFSET_SHIFT(eCoord)))); +} + +// UAV access scope flags +static const uint32_t GLOBALLY_COHERENT_ACCESS = 0x00010000; +static uint32_t DecodeAccessCoherencyFlags(uint32_t ui32Token) +{ + return ui32Token & 0x00010000; +} + +typedef enum RESINFO_RETURN_TYPE +{ + RESINFO_INSTRUCTION_RETURN_FLOAT = 0, + RESINFO_INSTRUCTION_RETURN_RCPFLOAT = 1, + RESINFO_INSTRUCTION_RETURN_UINT = 2 +} RESINFO_RETURN_TYPE; + +static RESINFO_RETURN_TYPE DecodeResInfoReturnType(uint32_t ui32Token) +{ + return (RESINFO_RETURN_TYPE)((ui32Token & 0x00001800) >> 11); +} + +typedef enum SB_SAMPLER_MODE +{ + D3D10_SB_SAMPLER_MODE_DEFAULT = 0, + D3D10_SB_SAMPLER_MODE_COMPARISON = 1, + D3D10_SB_SAMPLER_MODE_MONO = 2, +} SB_SAMPLER_MODE; + +static SB_SAMPLER_MODE DecodeSamplerMode(uint32_t ui32Token) +{ + return (SB_SAMPLER_MODE)((ui32Token & 0x00001800) >> 11); +} + +#endif diff --git a/third_party/HLSLcc/src/reflect.cpp b/third_party/HLSLcc/src/reflect.cpp new file mode 100644 index 0000000..303dbfd --- /dev/null +++ b/third_party/HLSLcc/src/reflect.cpp @@ -0,0 +1,620 @@ +#include "internal_includes/reflect.h" +#include "internal_includes/debug.h" +#include "internal_includes/decode.h" +#include "bstrlib.h" +#include +#include +#include + +static void FormatVariableName(std::string & Name) +{ + /* MSDN http://msdn.microsoft.com/en-us/library/windows/desktop/bb944006(v=vs.85).aspx + The uniform function parameters appear in the + constant table prepended with a dollar sign ($), + unlike the global variables. The dollar sign is + required to avoid name collisions between local + uniform inputs and global variables of the same name.*/ + + /* Leave $ThisPointer, $Element and $Globals as-is. + Otherwise remove $ character ($ is not a valid character for GLSL variable names). */ + if (Name[0] == '$') + { + if (strcmp(Name.c_str(), "$Element") != 0 && + strcmp(Name.c_str(), "$Globals") != 0 && + strcmp(Name.c_str(), "$ThisPointer") != 0) + { + Name[0] = '_'; + } + } +} + +static std::string ReadStringFromTokenStream(const uint32_t* tokens) +{ + char* charTokens = (char*)tokens; + return std::string(charTokens); +} + +static int MaskToRebaseOffset(const uint32_t mask) +{ + int res = 0; + uint32_t m = mask; + while ((m & 1) == 0) + { + res++; + m = m >> 1; + } + return res; +} + +static void ReadInputSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int extended) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /* const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psInputSignatures.clear(); + psShaderInfo->psInputSignatures.resize(ui32ElementCount); + + for (i = 0; i < ui32ElementCount; ++i) + { + uint32_t ui32ComponentMasks; + ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psInputSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if (extended) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are read + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if (extended) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + } +} + +static void ReadOutputSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /*const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psOutputSignatures.clear(); + psShaderInfo->psOutputSignatures.resize(ui32ElementCount); + + for (i = 0; i < ui32ElementCount; ++i) + { + uint32_t ui32ComponentMasks; + ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psOutputSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if (streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are NEVER written. + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if (minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + } +} + +static void ReadPatchConstantSignatures(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo, + const int minPrec, + const int streams) +{ + uint32_t i; + + const uint32_t* pui32FirstSignatureToken = pui32Tokens; + const uint32_t ui32ElementCount = *pui32Tokens++; + /*const uint32_t ui32Key = * */ pui32Tokens++; + + psShaderInfo->psPatchConstantSignatures.clear(); + psShaderInfo->psPatchConstantSignatures.resize(ui32ElementCount); + + for (i = 0; i < ui32ElementCount; ++i) + { + uint32_t ui32ComponentMasks; + ShaderInfo::InOutSignature* psCurrentSignature = &psShaderInfo->psPatchConstantSignatures[i]; + uint32_t ui32SemanticNameOffset; + + psCurrentSignature->ui32Stream = 0; + psCurrentSignature->eMinPrec = MIN_PRECISION_DEFAULT; + + if (streams) + psCurrentSignature->ui32Stream = *pui32Tokens++; + + ui32SemanticNameOffset = *pui32Tokens++; + psCurrentSignature->ui32SemanticIndex = *pui32Tokens++; + psCurrentSignature->eSystemValueType = (SPECIAL_NAME)*pui32Tokens++; + psCurrentSignature->eComponentType = (INOUT_COMPONENT_TYPE)*pui32Tokens++; + psCurrentSignature->ui32Register = *pui32Tokens++; + + // Massage some special inputs/outputs to match the types of GLSL counterparts + if (psCurrentSignature->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX) + { + psCurrentSignature->eComponentType = INOUT_COMPONENT_SINT32; + } + + ui32ComponentMasks = *pui32Tokens++; + psCurrentSignature->ui32Mask = ui32ComponentMasks & 0x7F; + //Shows which components are NEVER written. + psCurrentSignature->ui32ReadWriteMask = (ui32ComponentMasks & 0x7F00) >> 8; + psCurrentSignature->iRebase = MaskToRebaseOffset(psCurrentSignature->ui32Mask); + + if (minPrec) + psCurrentSignature->eMinPrec = (MIN_PRECISION)*pui32Tokens++; + + psCurrentSignature->semanticName = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstSignatureToken + ui32SemanticNameOffset)); + } +} + +static const uint32_t* ReadResourceBinding(ShaderInfo* psShaderInfo, const uint32_t* pui32FirstResourceToken, const uint32_t* pui32Tokens, ResourceBinding* psBinding, uint32_t decodeFlags) +{ + uint32_t ui32NameOffset = *pui32Tokens++; + + psBinding->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstResourceToken + ui32NameOffset)); + FormatVariableName(psBinding->name); + + psBinding->eType = (ResourceType) * pui32Tokens++; + psBinding->ui32ReturnType = (RESOURCE_RETURN_TYPE)*pui32Tokens++; + psBinding->eDimension = (REFLECT_RESOURCE_DIMENSION)*pui32Tokens++; + psBinding->ui32NumSamples = *pui32Tokens++; // fxc generates 2^32 - 1 for non MS images + psBinding->ui32BindPoint = *pui32Tokens++; + psBinding->ui32BindCount = *pui32Tokens++; + psBinding->ui32Flags = *pui32Tokens++; + if (((psShaderInfo->ui32MajorVersion >= 5) && (psShaderInfo->ui32MinorVersion >= 1)) || + (psShaderInfo->ui32MajorVersion > 5)) + { + psBinding->ui32Space = *pui32Tokens++; + psBinding->ui32RangeID = *pui32Tokens++; + } + + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_UNKNOWN; + + if (decodeFlags & HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME) + { + if (psBinding->name.rfind("_highp") == psBinding->name.length() - 6) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_HIGHP; + psBinding->name.resize(psBinding->name.length() - 6); + } + else if (psBinding->name.rfind("_mediump") == psBinding->name.length() - 8) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_MEDIUMP; + psBinding->name.resize(psBinding->name.length() - 8); + } + else if (psBinding->name.rfind("_lowp") == psBinding->name.length() - 5) + { + psBinding->ePrecision = REFLECT_RESOURCE_PRECISION_LOWP; + psBinding->name.resize(psBinding->name.length() - 5); + } + } + + return pui32Tokens; +} + +//Read D3D11_SHADER_TYPE_DESC +static void ReadShaderVariableType(const uint32_t ui32MajorVersion, + const uint32_t* pui32FirstConstBufToken, + const uint32_t* pui32tokens, ShaderVarType* varType) +{ + const uint16_t* pui16Tokens = (const uint16_t*)pui32tokens; + uint16_t ui32MemberCount; + uint32_t ui32MemberOffset; + const uint32_t* pui32MemberTokens; + uint32_t i; + + varType->Class = (SHADER_VARIABLE_CLASS)pui16Tokens[0]; + varType->Type = (SHADER_VARIABLE_TYPE)pui16Tokens[1]; + varType->Rows = pui16Tokens[2]; + varType->Columns = pui16Tokens[3]; + varType->Elements = pui16Tokens[4]; + + varType->MemberCount = ui32MemberCount = pui16Tokens[5]; + varType->Members.clear(); + + if (varType->ParentCount) + { + // Add empty brackets for array parents. Indices are filled in later in the printing codes. + if (varType->Parent->Elements > 1) + varType->fullName = varType->Parent->fullName + "[]." + varType->name; + else + varType->fullName = varType->Parent->fullName + "." + varType->name; + } + + if (ui32MemberCount) + { + varType->Members.resize(ui32MemberCount); + + ui32MemberOffset = pui32tokens[3]; + + pui32MemberTokens = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberOffset); + + for (i = 0; i < ui32MemberCount; ++i) + { + uint32_t ui32NameOffset = *pui32MemberTokens++; + uint32_t ui32MemberTypeOffset = *pui32MemberTokens++; + + varType->Members[i].Parent = varType; + varType->Members[i].ParentCount = varType->ParentCount + 1; + + varType->Members[i].Offset = *pui32MemberTokens++; + + varType->Members[i].name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + + ReadShaderVariableType(ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32MemberTypeOffset), &varType->Members[i]); + } + } +} + +static const uint32_t* ReadConstantBuffer(ShaderInfo* psShaderInfo, + const uint32_t* pui32FirstConstBufToken, const uint32_t* pui32Tokens, ConstantBuffer* psBuffer) +{ + uint32_t i; + uint32_t ui32NameOffset = *pui32Tokens++; + uint32_t ui32VarCount = *pui32Tokens++; + uint32_t ui32VarOffset = *pui32Tokens++; + const uint32_t* pui32VarToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32VarOffset); + + psBuffer->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + FormatVariableName(psBuffer->name); + + psBuffer->asVars.clear(); + psBuffer->asVars.resize(ui32VarCount); + + for (i = 0; i < ui32VarCount; ++i) + { + //D3D11_SHADER_VARIABLE_DESC + ShaderVar * const psVar = &psBuffer->asVars[i]; + + uint32_t ui32TypeOffset; + uint32_t ui32DefaultValueOffset; + + ui32NameOffset = *pui32VarToken++; + + psVar->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstConstBufToken + ui32NameOffset)); + FormatVariableName(psVar->name); + + psVar->ui32StartOffset = *pui32VarToken++; + psVar->ui32Size = *pui32VarToken++; + + //skip ui32Flags + pui32VarToken++; + + ui32TypeOffset = *pui32VarToken++; + + psVar->sType.name = psVar->name; + psVar->sType.fullName = psVar->name; + psVar->sType.Parent = 0; + psVar->sType.ParentCount = 0; + psVar->sType.Offset = 0; + psVar->sType.m_IsUsed = false; + + ReadShaderVariableType(psShaderInfo->ui32MajorVersion, pui32FirstConstBufToken, + (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32TypeOffset), &psVar->sType); + + ui32DefaultValueOffset = *pui32VarToken++; + + + if (psShaderInfo->ui32MajorVersion >= 5) + { + /*uint32_t StartTexture = * */ pui32VarToken++; + /*uint32_t TextureSize = * */ pui32VarToken++; + /*uint32_t StartSampler = * */ pui32VarToken++; + /*uint32_t SamplerSize = * */ pui32VarToken++; + } + + psVar->haveDefaultValue = 0; + + if (ui32DefaultValueOffset) + { + uint32_t i = 0; + const uint32_t ui32NumDefaultValues = psVar->ui32Size / 4; + const uint32_t* pui32DefaultValToken = (const uint32_t*)((const char*)pui32FirstConstBufToken + ui32DefaultValueOffset); + + //Always a sequence of 4-bytes at the moment. + //bool const becomes 0 or 0xFFFFFFFF int, int & float are 4-bytes. + ASSERT(psVar->ui32Size % 4 == 0); + + psVar->haveDefaultValue = 1; + + psVar->pui32DefaultValues.clear(); + psVar->pui32DefaultValues.resize(psVar->ui32Size / 4); + + for (i = 0; i < ui32NumDefaultValues; ++i) + { + psVar->pui32DefaultValues[i] = pui32DefaultValToken[i]; + } + } + } + + + { + psBuffer->ui32TotalSizeInBytes = *pui32Tokens++; + + //skip ui32Flags + pui32Tokens++; + //skip ui32BufferType + pui32Tokens++; + } + + return pui32Tokens; +} + +static void ReadResources(const uint32_t* pui32Tokens,//in + ShaderInfo* psShaderInfo, //out + uint32_t decodeFlags) +{ + ResourceBinding* psResBindings; + ConstantBuffer* psConstantBuffers; + const uint32_t* pui32ConstantBuffers; + const uint32_t* pui32ResourceBindings; + const uint32_t* pui32FirstToken = pui32Tokens; + uint32_t i; + + const uint32_t ui32NumConstantBuffers = *pui32Tokens++; + const uint32_t ui32ConstantBufferOffset = *pui32Tokens++; + + uint32_t ui32NumResourceBindings = *pui32Tokens++; + uint32_t ui32ResourceBindingOffset = *pui32Tokens++; + /*uint32_t ui32ShaderModel = * */ pui32Tokens++; + /*uint32_t ui32CompileFlags = * */ pui32Tokens++;//D3DCompile flags? http://msdn.microsoft.com/en-us/library/gg615083(v=vs.85).aspx + + //Resources + pui32ResourceBindings = (const uint32_t*)((const char*)pui32FirstToken + ui32ResourceBindingOffset); + + psShaderInfo->psResourceBindings.clear(); + psShaderInfo->psResourceBindings.resize(ui32NumResourceBindings); + psResBindings = ui32NumResourceBindings == 0 ? NULL : &psShaderInfo->psResourceBindings[0]; + + for (i = 0; i < ui32NumResourceBindings; ++i) + { + pui32ResourceBindings = ReadResourceBinding(psShaderInfo, pui32FirstToken, pui32ResourceBindings, psResBindings + i, decodeFlags); + ASSERT(psResBindings[i].ui32BindPoint < MAX_RESOURCE_BINDINGS); + } + + //Constant buffers + pui32ConstantBuffers = (const uint32_t*)((const char*)pui32FirstToken + ui32ConstantBufferOffset); + + psShaderInfo->psConstantBuffers.clear(); + psShaderInfo->psConstantBuffers.resize(ui32NumConstantBuffers); + psConstantBuffers = ui32NumConstantBuffers == 0 ? NULL : &psShaderInfo->psConstantBuffers[0]; + + for (i = 0; i < ui32NumConstantBuffers; ++i) + { + pui32ConstantBuffers = ReadConstantBuffer(psShaderInfo, pui32FirstToken, pui32ConstantBuffers, psConstantBuffers + i); + } + + //Map resource bindings to constant buffers + if (psShaderInfo->psConstantBuffers.size()) + { + /* HLSL allows the following: + cbuffer A + {...} + cbuffer A + {...} + And both will be present in the assembly if used + + So we need to track which ones we matched already and throw an error if two buffers have the same name + */ + std::vector alreadyBound(ui32NumConstantBuffers, 0); + for (i = 0; i < ui32NumResourceBindings; ++i) + { + ResourceGroup eRGroup; + uint32_t cbufIndex = 0; + + eRGroup = ShaderInfo::ResourceTypeToResourceGroup(psResBindings[i].eType); + + //Find the constant buffer whose name matches the resource at the given resource binding point + for (cbufIndex = 0; cbufIndex < psShaderInfo->psConstantBuffers.size(); cbufIndex++) + { + if (psConstantBuffers[cbufIndex].name == psResBindings[i].name && alreadyBound[cbufIndex] == 0) + { + psShaderInfo->aui32ResourceMap[eRGroup][psResBindings[i].ui32BindPoint] = cbufIndex; + alreadyBound[cbufIndex] = 1; + break; + } + } + } + } +} + +static const uint16_t* ReadClassType(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassType* psClassType) +{ + const uint32_t* pui32Tokens = (const uint32_t*)pui16Tokens; + uint32_t ui32NameOffset = *pui32Tokens; + pui16Tokens += 2; + + psClassType->ui16ID = *pui16Tokens++; + psClassType->ui16ConstBufStride = *pui16Tokens++; + psClassType->ui16Texture = *pui16Tokens++; + psClassType->ui16Sampler = *pui16Tokens++; + + psClassType->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + + return pui16Tokens; +} + +static const uint16_t* ReadClassInstance(const uint32_t* pui32FirstInterfaceToken, const uint16_t* pui16Tokens, ClassInstance* psClassInstance) +{ + uint32_t ui32NameOffset = *pui16Tokens++ << 16; + ui32NameOffset |= *pui16Tokens++; + + psClassInstance->ui16ID = *pui16Tokens++; + psClassInstance->ui16ConstBuf = *pui16Tokens++; + psClassInstance->ui16ConstBufOffset = *pui16Tokens++; + psClassInstance->ui16Texture = *pui16Tokens++; + psClassInstance->ui16Sampler = *pui16Tokens++; + + psClassInstance->name = ReadStringFromTokenStream((const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32NameOffset)); + + return pui16Tokens; +} + +static void ReadInterfaces(const uint32_t* pui32Tokens, + ShaderInfo* psShaderInfo) +{ + uint32_t i; + uint32_t ui32StartSlot; + const uint32_t* pui32FirstInterfaceToken = pui32Tokens; + const uint32_t ui32ClassInstanceCount = *pui32Tokens++; + const uint32_t ui32ClassTypeCount = *pui32Tokens++; + const uint32_t ui32InterfaceSlotRecordCount = *pui32Tokens++; + /*const uint32_t ui32InterfaceSlotCount = * */ pui32Tokens++; + const uint32_t ui32ClassInstanceOffset = *pui32Tokens++; + const uint32_t ui32ClassTypeOffset = *pui32Tokens++; + const uint32_t ui32InterfaceSlotOffset = *pui32Tokens++; + + const uint16_t* pui16ClassTypes = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassTypeOffset); + const uint16_t* pui16ClassInstances = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32ClassInstanceOffset); + const uint32_t* pui32InterfaceSlots = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32InterfaceSlotOffset); + + const uint32_t* pui32InterfaceSlotTokens = pui32InterfaceSlots; + + ClassType* psClassTypes; + ClassInstance* psClassInstances; + + psShaderInfo->psClassTypes.clear(); + psShaderInfo->psClassTypes.resize(ui32ClassTypeCount); + psClassTypes = &psShaderInfo->psClassTypes[0]; + + for (i = 0; i < ui32ClassTypeCount; ++i) + { + pui16ClassTypes = ReadClassType(pui32FirstInterfaceToken, pui16ClassTypes, psClassTypes + i); + psClassTypes[i].ui16ID = (uint16_t)i; + } + + psShaderInfo->psClassInstances.clear(); + psShaderInfo->psClassInstances.resize(ui32ClassInstanceCount); + psClassInstances = &psShaderInfo->psClassInstances[0]; + + for (i = 0; i < ui32ClassInstanceCount; ++i) + { + pui16ClassInstances = ReadClassInstance(pui32FirstInterfaceToken, pui16ClassInstances, psClassInstances + i); + } + + //Slots map function table to $ThisPointer cbuffer variable index + ui32StartSlot = 0; + for (i = 0; i < ui32InterfaceSlotRecordCount; ++i) + { + uint32_t k; + + const uint32_t ui32SlotSpan = *pui32InterfaceSlotTokens++; + const uint32_t ui32Count = *pui32InterfaceSlotTokens++; + const uint32_t ui32TypeIDOffset = *pui32InterfaceSlotTokens++; + const uint32_t ui32TableIDOffset = *pui32InterfaceSlotTokens++; + + const uint16_t* pui16TypeID = (const uint16_t*)((const char*)pui32FirstInterfaceToken + ui32TypeIDOffset); + const uint32_t* pui32TableID = (const uint32_t*)((const char*)pui32FirstInterfaceToken + ui32TableIDOffset); + + for (k = 0; k < ui32Count; ++k) + { + psShaderInfo->aui32TableIDToTypeID[*pui32TableID++] = *pui16TypeID++; + } + + ui32StartSlot += ui32SlotSpan; + } +} + +void LoadShaderInfo(const uint32_t ui32MajorVersion, + const uint32_t ui32MinorVersion, + const ReflectionChunks* psChunks, + ShaderInfo* psInfo, + uint32_t decodeFlags) +{ + const uint32_t* pui32Inputs = psChunks->pui32Inputs; + const uint32_t* pui32Inputs11 = psChunks->pui32Inputs11; + const uint32_t* pui32Resources = psChunks->pui32Resources; + const uint32_t* pui32Interfaces = psChunks->pui32Interfaces; + const uint32_t* pui32Outputs = psChunks->pui32Outputs; + const uint32_t* pui32Outputs11 = psChunks->pui32Outputs11; + const uint32_t* pui32OutputsWithStreams = psChunks->pui32OutputsWithStreams; + const uint32_t* pui32PatchConstants = psChunks->pui32PatchConstants; + const uint32_t* pui32PatchConstants11 = psChunks->pui32PatchConstants11; + + psInfo->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED; + psInfo->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED; + psInfo->ui32TessInputControlPointCount = 0; + psInfo->ui32TessOutputControlPointCount = 0; + psInfo->eTessDomain = TESSELLATOR_DOMAIN_UNDEFINED; + psInfo->bEarlyFragmentTests = false; + + psInfo->ui32MajorVersion = ui32MajorVersion; + psInfo->ui32MinorVersion = ui32MinorVersion; + + + if (pui32Inputs) + ReadInputSignatures(pui32Inputs, psInfo, 0); + if (pui32Inputs11) + ReadInputSignatures(pui32Inputs11, psInfo, 1); + if (pui32Resources) + ReadResources(pui32Resources, psInfo, decodeFlags); + if (pui32Interfaces) + ReadInterfaces(pui32Interfaces, psInfo); + if (pui32Outputs) + ReadOutputSignatures(pui32Outputs, psInfo, 0, 0); + if (pui32Outputs11) + ReadOutputSignatures(pui32Outputs11, psInfo, 1, 1); + if (pui32OutputsWithStreams) + ReadOutputSignatures(pui32OutputsWithStreams, psInfo, 0, 1); + if (pui32PatchConstants) + ReadPatchConstantSignatures(pui32PatchConstants, psInfo, 0, 0); + if (pui32PatchConstants11) + ReadPatchConstantSignatures(pui32PatchConstants11, psInfo, 1, 1); + + { + uint32_t i; + for (i = 0; i < psInfo->psConstantBuffers.size(); ++i) + { + if (psInfo->psConstantBuffers[i].name == "$ThisPointer") + { + psInfo->psThisPointerConstBuffer = &psInfo->psConstantBuffers[i]; + } + } + } +} diff --git a/third_party/HLSLcc/src/toGLSL.cpp b/third_party/HLSLcc/src/toGLSL.cpp new file mode 100644 index 0000000..c6c4e14 --- /dev/null +++ b/third_party/HLSLcc/src/toGLSL.cpp @@ -0,0 +1,1190 @@ +#include + +#include "internal_includes/tokens.h" +#include "internal_includes/decode.h" +#include "stdlib.h" +#include "stdio.h" +#include "bstrlib.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/languages.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/UseDefineChains.h" +#include "internal_includes/DataTypeAnalysis.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/LoopTransform.h" +#include "UnityInstancingFlexibleArraySize.h" +#include +#include + +// In GLSL, the input and output names cannot clash. +// Also, the output name of previous stage must match the input name of the next stage. +// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program. +// +void ToGLSL::SetIOPrefixes() +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + psContext->inputPrefix = "in_"; + psContext->outputPrefix = "vs_"; + break; + + case HULL_SHADER: + // Input always coming from vertex shader + psContext->inputPrefix = "vs_"; + psContext->outputPrefix = "hs_"; + break; + + case DOMAIN_SHADER: + // There's no domain shader without hull shader + psContext->inputPrefix = "hs_"; + psContext->outputPrefix = "ds_"; + break; + + case GEOMETRY_SHADER: + // The input depends on whether there's a tessellation shader before us + if (psContext->psDependencies && (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)) + psContext->inputPrefix = "ds_"; + else + psContext->inputPrefix = "vs_"; + + psContext->outputPrefix = "gs_"; + break; + + case PIXEL_SHADER: + // The inputs can come from geom shader, domain shader or directly from vertex shader + if (psContext->psDependencies) + { + if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER) + { + psContext->inputPrefix = "gs_"; + } + else if (psContext->psDependencies->ui32ProgramStages & PS_FLAG_DOMAIN_SHADER) + { + psContext->inputPrefix = "ds_"; + } + else + { + psContext->inputPrefix = "vs_"; + } + } + else + { + psContext->inputPrefix = "vs_"; + } + psContext->outputPrefix = ""; + break; + + + case COMPUTE_SHADER: + default: + // No prefixes + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; + } +} + +static void AddVersionDependentCode(HLSLCrossCompilerContext* psContext) +{ + bstring glsl = *psContext->currentGLSLString; + bstring extensions = psContext->extensions; + bool isES = (psContext->psShader->eTargetLanguage >= LANG_ES_100 && psContext->psShader->eTargetLanguage <= LANG_ES_310); + bool GL_ARB_shader_storage_buffer_object = false; + bool GL_ARB_shader_image_load_store = false; + + if (psContext->psShader->ui32MajorVersion > 3 && psContext->psShader->eTargetLanguage != LANG_ES_100 && psContext->psShader->eTargetLanguage != LANG_ES_300 && psContext->psShader->eTargetLanguage != LANG_ES_310 && !(psContext->psShader->eTargetLanguage >= LANG_330)) + { + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); + } + + if (!HaveCompute(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + { + psContext->EnableExtension("GL_ARB_compute_shader"); + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_STRUCTURED] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_RESOURCE_RAW]) + { + GL_ARB_shader_storage_buffer_object = true; + } + } + + if (!HaveAtomicMem(psContext->psShader->eTargetLanguage) || + !HaveAtomicCounter(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_ALLOC] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CONSUME] || + psContext->psShader->aiOpcodeUsed[OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED]) + { + psContext->EnableExtension("GL_ARB_shader_atomic_counters"); + } + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_CMP_STORE] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_AND] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IADD] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_IMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMAX] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_UMIN] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_OR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_XOR] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_EXCH] || + psContext->psShader->aiOpcodeUsed[OPCODE_IMM_ATOMIC_CMP_EXCH]) + { + if (!HaveAtomicMem(psContext->psShader->eTargetLanguage)) + GL_ARB_shader_storage_buffer_object = true; + + if (!HaveImageAtomics(psContext->psShader->eTargetLanguage)) + { + if (isES) + psContext->EnableExtension("GL_OES_shader_image_atomic"); + else + GL_ARB_shader_image_load_store = true; + } + } + + if (!HaveGather(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_C]) + { + psContext->EnableExtension("GL_ARB_texture_gather"); + } + } + + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTX] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_COARSE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY_FINE] || + psContext->psShader->aiOpcodeUsed[OPCODE_DERIV_RTY]) + { + if (psContext->psShader->eTargetLanguage < LANG_ES_300) + { + psContext->EnableExtension("GL_OES_standard_derivatives"); + } + } + + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_L] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_C_LZ] || + psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_D])) + { + psContext->EnableExtension("GL_EXT_shader_texture_lod"); + + static const int tex_sampler_type_count = 4; + static const char* tex_sampler_dim_name[tex_sampler_type_count] = { + "1D", "2D", "3D", "Cube", + }; + + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bcatcstr(extensions, "#if !defined(GL_EXT_shader_texture_lod)\n"); + + for (int dim = 0; dim < tex_sampler_type_count; dim++) + { + bformata(extensions, "#define texture%sLodEXT texture%s\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + + if (dim == 1) // 2D + bformata(extensions, "#define texture%sProjLodEXT texture%sProj\n", tex_sampler_dim_name[dim], tex_sampler_dim_name[dim]); + } + bcatcstr(extensions, "#endif\n"); + } + } + } + + if (!HaveGatherNonConstOffset(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO_C] || + psContext->psShader->aiOpcodeUsed[OPCODE_GATHER4_PO]) + { + psContext->EnableExtension("GL_ARB_gpu_shader5"); + } + } + + if (!HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_LOD]) + { + psContext->EnableExtension("GL_ARB_texture_query_lod"); + } + } + + if (!HaveQueryLevels(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_RESINFO]) + { + psContext->EnableExtension("GL_ARB_texture_query_levels"); + psContext->EnableExtension("GL_ARB_shader_image_size"); + } + } + + if (psContext->psShader->aiOpcodeUsed[OPCODE_SAMPLE_INFO]) + { + psContext->EnableExtension("GL_ARB_shader_texture_image_samples"); + } + + if (!HaveImageLoadStore(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->aiOpcodeUsed[OPCODE_STORE_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_STORE_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + psContext->EnableExtension("GL_ARB_shader_bit_encoding"); + } + else if (psContext->psShader->aiOpcodeUsed[OPCODE_LD_UAV_TYPED] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_RAW] || + psContext->psShader->aiOpcodeUsed[OPCODE_LD_STRUCTURED]) + { + GL_ARB_shader_image_load_store = true; + } + } + + if (!HaveGeometryShaderARB(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + psContext->EnableExtension("GL_ARB_geometry_shader"); + } + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + { + psContext->EnableExtension("GL_OES_geometry_shader"); + psContext->EnableExtension("GL_EXT_geometry_shader"); + } + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310) + { + if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + psContext->EnableExtension("GL_OES_tessellation_shader"); + psContext->EnableExtension("GL_EXT_tessellation_shader"); + } + } + + if (GL_ARB_shader_storage_buffer_object) + psContext->EnableExtension("GL_ARB_shader_storage_buffer_object"); + + if (GL_ARB_shader_image_load_store) + psContext->EnableExtension("GL_ARB_shader_image_load_store"); + + if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_120 && !HaveFragmentCoordConventions(psContext->psShader->eTargetLanguage)) + { + psContext->RequireExtension("GL_ARB_fragment_coord_conventions"); + } + + if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && psContext->psShader->eShaderType == PIXEL_SHADER && psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) + { + psContext->EnableExtension("GL_EXT_shader_framebuffer_fetch"); + } + + //Handle fragment shader default precision + if (psContext->psShader->eShaderType == PIXEL_SHADER && + (psContext->psShader->eTargetLanguage == LANG_ES_100 || psContext->psShader->eTargetLanguage == LANG_ES_300 || psContext->psShader->eTargetLanguage == LANG_ES_310 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET))) + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + // gles 2.0 shaders can have mediump as default if the GPU doesn't have highp support + bcatcstr(glsl, + "#ifdef GL_FRAGMENT_PRECISION_HIGH\n" + " precision highp float;\n" + "#else\n" + " precision mediump float;\n" + "#endif\n"); + } + else + { + bcatcstr(glsl, "precision highp float;\n"); + } + + // Define default int precision to highp to avoid issues on platforms that actually implement mediump + bcatcstr(glsl, "precision highp int;\n"); + } + + if (psContext->psShader->eShaderType == PIXEL_SHADER && psContext->psShader->eTargetLanguage >= LANG_150) + { + if (psContext->flags & HLSLCC_FLAG_ORIGIN_UPPER_LEFT) + bcatcstr(glsl, "layout(origin_upper_left) in vec4 gl_FragCoord;\n"); + + if (psContext->flags & HLSLCC_FLAG_PIXEL_CENTER_INTEGER) + bcatcstr(glsl, "layout(pixel_center_integer) in vec4 gl_FragCoord;\n"); + } + + + /* + OpenGL 4.1 API spec: + To use any built-in input or output in the gl_PerVertex block in separable + program objects, shader code must redeclare that block prior to use. + */ + /* DISABLED FOR NOW */ +/* if(psContext->psShader->eShaderType == VERTEX_SHADER && psContext->psShader->eTargetLanguage >= LANG_410) + { + bcatcstr(glsl, "out gl_PerVertex {\n"); + bcatcstr(glsl, "vec4 gl_Position;\n"); + bcatcstr(glsl, "float gl_PointSize;\n"); + bcatcstr(glsl, "float gl_ClipDistance[];"); + bcatcstr(glsl, "};\n"); + }*/ +} + +GLLang ChooseLanguage(Shader* psShader) +{ + // Depends on the HLSL shader model extracted from bytecode. + switch (psShader->ui32MajorVersion) + { + case 5: + { + return LANG_430; + } + case 4: + { + return LANG_330; + } + default: + { + return LANG_120; + } + } +} + +const char* GetVersionString(GLLang language) +{ + switch (language) + { + case LANG_ES_100: + { + return "#version 100\n"; + break; + } + case LANG_ES_300: + { + return "#version 300 es\n"; + break; + } + case LANG_ES_310: + { + return "#version 310 es\n"; + break; + } + case LANG_120: + { + return "#version 120\n"; + break; + } + case LANG_130: + { + return "#version 130\n"; + break; + } + case LANG_140: + { + return "#version 140\n"; + break; + } + case LANG_150: + { + return "#version 150\n"; + break; + } + case LANG_330: + { + return "#version 330\n"; + break; + } + case LANG_400: + { + return "#version 400\n"; + break; + } + case LANG_410: + { + return "#version 410\n"; + break; + } + case LANG_420: + { + return "#version 420\n"; + break; + } + case LANG_430: + { + return "#version 430\n"; + break; + } + case LANG_440: + { + return "#version 440\n"; + break; + } + default: + { + return ""; + break; + } + } +} + +static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) +{ + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } +} + +static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) +{ + uint32_t i; + bstring glsl = psContext->glsl; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + const char *Type; + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + switch (psSig->eComponentType) + { + default: + case INOUT_COMPONENT_FLOAT32: + Type = ui32NumComponents > 1 ? "vec" : "float"; + break; + case INOUT_COMPONENT_SINT32: + Type = ui32NumComponents > 1 ? "ivec" : "int"; + break; + case INOUT_COMPONENT_UINT32: + Type = ui32NumComponents > 1 ? "uvec" : "uint"; + break; + } + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + continue; + + std::string inputName; + + { + std::ostringstream oss; + oss << psContext->inputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + inputName = oss.str(); + } + + if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) + continue; + + psContext->psDependencies->RecordHullShaderInput(inputName); + + std::string outputName; + { + std::ostringstream oss; + oss << psContext->outputPrefix << psSig->semanticName << psSig->ui32SemanticIndex; + outputName = oss.str(); + } + + const char * prec = ""; + if (HavePrecisionQualifiers(psContext)) + { + if (psSig->eMinPrec != MIN_PRECISION_DEFAULT) + prec = "mediump "; + else + prec = "highp "; + } + + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + int inLoc = psContext->psDependencies->GetVaryingLocation(inputName, HULL_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); + int outLoc = psContext->psDependencies->GetVaryingLocation(outputName, HULL_SHADER, false, keepLocation, psContext->psShader->maxSemanticIndex); + + psContext->AddIndentation(); + if (ui32NumComponents > 1) + bformata(glsl, "layout(location = %d) in %s%s%d %s%s%d[];\n", inLoc, prec, Type, ui32NumComponents, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "layout(location = %d) in %s%s %s%s%d[];\n", inLoc, prec, Type, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + + psContext->AddIndentation(); + if (ui32NumComponents > 1) + bformata(glsl, "layout(location = %d) out %s%s%d %s%s%d[];\n", outLoc, prec, Type, ui32NumComponents, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + else + bformata(glsl, "layout(location = %d) out %s%s %s%s%d[];\n", outLoc, prec, Type, psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->AddIndentation(); + bcatcstr(glsl, "void passthrough_ctrl_points()\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + psContext->AddIndentation(); + + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + bformata(glsl, "gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"); + else + bformata(glsl, "%s%s%d[gl_InvocationID] = %s%s%d[gl_InvocationID];\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); +} + +GLLang ToGLSL::SetLanguage(GLLang suggestedLanguage) +{ + language = suggestedLanguage; + if (language == LANG_DEFAULT) + { + language = ChooseLanguage(psContext->psShader); + } + return language; +} + +// Go through all declarations and remove reserve UAV occupied binding points +void ResolveStructuredBufferBindingSlots(ShaderPhase *psPhase, HLSLCrossCompilerContext *psContext, GLSLCrossDependencyData *glslDependencyData) +{ + for (uint32_t p = 0; p < psPhase->psDecl.size(); ++p) + { + if (psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + psPhase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + uint32_t uav = psPhase->psDecl[p].asOperands[0].ui32RegisterNumber; // uav binding point + + bstring BufNamebstr = bfromcstr(""); + ResourceName(BufNamebstr, psContext, RGROUP_UAV, psPhase->psDecl[p].asOperands[0].ui32RegisterNumber, 0); + + char *btmp = bstr2cstr(BufNamebstr, '\0'); + std::string BufName = btmp; + bcstrfree(btmp); + bdestroy(BufNamebstr); + + glslDependencyData->ReserveNamedBindPoint(BufName, uav, GLSLCrossDependencyData::BufferType_ReadWrite); + } + } +} + +bool ToGLSL::Translate() +{ + bstring glsl; + uint32_t i; + Shader* psShader = psContext->psShader; + uint32_t ui32Phase; + + psContext->psTranslator = this; + + if (language == LANG_DEFAULT) + SetLanguage(LANG_DEFAULT); + + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) + psShader->SetMaxSemanticIndex(); + psShader->FindUnusedGlobals(psContext->flags); + + psContext->indent = 0; + + glsl = bfromcstralloc(1024 * 10, "\n"); + bstring extensions = bfromcstralloc(1024 * 10, GetVersionString(language)); + psContext->extensions = extensions; + + psContext->glsl = glsl; + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } + psContext->currentGLSLString = &glsl; + psShader->eTargetLanguage = language; + psContext->currentPhase = MAIN_PHASE; + + if (psShader->extensions) + { + if (psContext->flags & HLSLCC_FLAG_NVN_TARGET) + { + psContext->EnableExtension("GL_ARB_separate_shader_objects"); + psContext->EnableExtension("GL_NV_desktop_lowp_mediump"); // This flag allow FP16 operations (mediump in GLSL) + } + if (psShader->extensions->ARB_explicit_attrib_location) + psContext->RequireExtension("GL_ARB_explicit_attrib_location"); + if (psShader->extensions->ARB_explicit_uniform_location) + psContext->RequireExtension("GL_ARB_explicit_uniform_location"); + if (psShader->extensions->ARB_shading_language_420pack) + psContext->RequireExtension("GL_ARB_shading_language_420pack"); + } + + psContext->ClearDependencyData(); + + AddVersionDependentCode(psContext); + + if (psShader->eShaderType == VERTEX_SHADER && + HaveLimitedInOutLocationQualifier(language, psShader->extensions) && + psContext->flags & HLSLCC_FLAG_NVN_TARGET) + { + bcatcstr(glsl, "out gl_PerVertex { vec4 gl_Position; };\n"); + } + + if (!psContext->psDependencies->m_ExtBlendModes.empty() && psShader->eShaderType == PIXEL_SHADER) + { + psContext->EnableExtension("GL_KHR_blend_equation_advanced"); + bcatcstr(glsl, "#if GL_KHR_blend_equation_advanced\n"); + for (i = 0; i < psContext->psDependencies->m_ExtBlendModes.size(); i++) + { + bformata(glsl, "layout(%s) out;\n", psContext->psDependencies->m_ExtBlendModes[i].c_str()); + } + bcatcstr(glsl, "#endif\n"); + } + + if (psContext->psShader->eTargetLanguage != LANG_ES_100) + { + bool hasConstantBuffers = psContext->psShader->sInfo.psConstantBuffers.size() > 0; + if (hasConstantBuffers) + { + // This value will be replaced at runtime with 0 if we need to disable UBO. + bcatcstr(glsl, "#define HLSLCC_ENABLE_UNIFORM_BUFFERS 1\n"); + bcatcstr(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n#define UNITY_UNIFORM\n#else\n#define UNITY_UNIFORM uniform\n#endif\n"); + } + bool hasTextures = false; + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + if (psShader->asPhases[0].psDecl[i].eOpcode == OPCODE_DCL_RESOURCE) + { + hasTextures = true; + break; + } + } + if (hasTextures || hasConstantBuffers) + { + // This value will be replaced at runtime with 0 if we need to disable explicit uniform locations. + bcatcstr(glsl, "#define UNITY_SUPPORTS_UNIFORM_LOCATION 1\n"); + bcatcstr(glsl, "#if UNITY_SUPPORTS_UNIFORM_LOCATION\n#define UNITY_LOCATION(x) layout(location = x)\n#define UNITY_BINDING(x) layout(binding = x, std140)\n#else\n#define UNITY_LOCATION(x)\n#define UNITY_BINDING(x) layout(std140)\n#endif\n"); + } + } + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(psShader->sInfo); + ResolveStructuredBufferBindingSlots(&phase, psContext, psContext->psDependencies); + if (!psContext->IsVulkan() && !psContext->IsSwitch()) + { + phase.PruneConstArrays(); + psContext->ReserveFramebufferFetchInputs(); + } + } + + psShader->PruneTempRegisters(); + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + // Loop transform can only be done after the temps have been pruned + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + HLSLcc::DoLoopTransform(psContext, phase); + } + + //Special case. Can have multiple phases. + if (psShader->eShaderType == HULL_SHADER) + { + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int perPatchSectionAdded = 0; + int hasControlPointPhase = 0; + + psShader->ConsolidateHullTempVars(); + + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } + + // Phase 1 is always the global decls phase, no instructions + for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DoHullShaderPassthrough(psContext); + } + + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + psContext->currentPhase = ui32Phase; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + } + + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } + + bformata(glsl, "void %s%d(int phaseInstanceID)\n{\n", GetPhaseFuncName(psPhase->ePhase), ui32Phase); + psContext->indent++; + + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } + + + psContext->indent--; + bcatcstr(glsl, "}\n"); + } + + bcatcstr(glsl, "void main()\n{\n"); + + psContext->indent++; + + // There are cases when there are no control point phases and we have to do passthrough + if (hasControlPointPhase == 0) + { + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "passthrough_ctrl_points();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + + if (psPhase->earlyMain->slen > 1) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); + } + + bconcat(glsl, psPhase->earlyMain); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); + } + } + + for (i = 0; i < psPhase->ui32InstanceCount; i++) + { + psContext->AddIndentation(); + bformata(glsl, "%s%d(%d);\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase, i); + } + + if (psPhase->hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + + bconcat(glsl, psPhase->postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } + } + + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "if (gl_InvocationID == 0)\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + psContext->indent++; + perPatchSectionAdded = 1; + } + } + } + + if (perPatchSectionAdded != 0) + { + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + + psContext->indent--; + + bcatcstr(glsl, "}\n"); + + // Print out extra functions we generated, in reverse order for potential dependencies + std::for_each(m_FunctionDefinitions.rbegin(), m_FunctionDefinitions.rend(), [&extensions](const FunctionDefinitions::value_type &p) + { + bcatcstr(extensions, p.second.c_str()); + bcatcstr(extensions, "\n"); + }); + + // Concat extensions and glsl for the final shader code. + if (m_NeedUnityInstancingArraySizeDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); + } + else + { + bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); + } + } + if (m_NeedUnityPreTransformDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); + } + } + + bconcat(extensions, glsl); + bdestroy(glsl); + psContext->glsl = extensions; + glsl = NULL; + + if (psContext->psDependencies) + { + //Save partitioning and primitive type for use by domain shader. + psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; + + psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; + } + + return true; + } + + if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + { + //Load partitioning and primitive type from hull shader. + switch (psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + { + bcatcstr(glsl, "layout(ccw) in;\n"); + break; + } + case TESSELLATOR_OUTPUT_TRIANGLE_CW: + { + bcatcstr(glsl, "layout(cw) in;\n"); + break; + } + case TESSELLATOR_OUTPUT_POINT: + { + bcatcstr(glsl, "layout(point_mode) in;\n"); + break; + } + default: + { + break; + } + } + + switch (psContext->psDependencies->eTessPartitioning) + { + case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + { + bcatcstr(glsl, "layout(fractional_odd_spacing) in;\n"); + break; + } + case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + { + bcatcstr(glsl, "layout(fractional_even_spacing) in;\n"); + break; + } + default: + { + break; + } + } + } + + bstring generatedFunctionsKeyword = bfromcstr("\n// Generated functions\n\n"); + bstring beforeMain = NULL; + bstring beforeMainKeyword = NULL; + + if (!HaveDynamicIndexing(psContext)) + { + beforeMain = bfromcstr(""); + beforeMainKeyword = bfromcstr("\n// Before Main\n\n"); + psContext->beforeMain = beforeMain; + } + + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + } + + // Search and replace string, for injecting generated functions that need to be after default precision declarations + bconcat(glsl, generatedFunctionsKeyword); + + // Search and replace string, for injecting stuff from translation that need to be after normal declarations and before main + if (!HaveDynamicIndexing(psContext)) + { + bconcat(glsl, beforeMainKeyword); + } + + bcatcstr(glsl, "void main()\n{\n"); + + psContext->indent++; + + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Start Early Main ---\n"); + } + + bconcat(glsl, psContext->psShader->asPhases[0].earlyMain); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End Early Main ---\n"); + } + } + + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } + + psContext->indent--; + + bcatcstr(glsl, "}\n"); + + // Print out extra definitions and functions we generated in generation order to satisfy dependencies + { + bstring generatedFunctionsAndDefinitions = bfromcstr(""); + + for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) + { + bcatcstr(generatedFunctionsAndDefinitions, m_AdditionalDefinitions[i].c_str()); + bcatcstr(generatedFunctionsAndDefinitions, "\n"); + } + + for (std::vector::const_iterator funcNameIter = m_FunctionDefinitionsOrder.begin(); funcNameIter != m_FunctionDefinitionsOrder.end(); ++funcNameIter) + { + const FunctionDefinitions::const_iterator definition = m_FunctionDefinitions.find(*funcNameIter); + ASSERT(definition != m_FunctionDefinitions.end()); + bcatcstr(generatedFunctionsAndDefinitions, definition->second.c_str()); + bcatcstr(generatedFunctionsAndDefinitions, "\n"); + } + bfindreplace(glsl, generatedFunctionsKeyword, generatedFunctionsAndDefinitions, 0); + bdestroy(generatedFunctionsAndDefinitions); + bdestroy(generatedFunctionsKeyword); + } + + // Concat extensions and glsl for the final shader code. + if (m_NeedUnityInstancingArraySizeDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 2;\n", kArraySizeConstantID, UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO); + } + else + { + bcatcstr(extensions, "#ifndef " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "\n\t#define " UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO " 2\n#endif\n"); + } + } + if (m_NeedUnityPreTransformDecl) + { + if (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) + { + bformata(extensions, "layout(constant_id = %d) const int %s = 0;\n", kPreTransformConstantID, UNITY_PRETRANSFORM_CONSTANT_NAME); + } + } + bconcat(extensions, glsl); + bdestroy(glsl); + + if (!HaveDynamicIndexing(psContext)) + { + bstring empty = bfromcstr(""); + + if (beforeMain->slen > 1) + bfindreplace(extensions, beforeMainKeyword, beforeMain, 0); + else + bfindreplace(extensions, beforeMainKeyword, empty, 0); + + psContext->beforeMain = NULL; + + bdestroy(empty); + bdestroy(beforeMain); + bdestroy(beforeMainKeyword); + } + + psContext->glsl = extensions; + glsl = NULL; + + return true; +} + +bool ToGLSL::DeclareExtraFunction(const std::string &name, bstring body) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return true; + m_FunctionDefinitions.insert(std::make_pair(name, (const char *)body->data)); + m_FunctionDefinitionsOrder.push_back(name); + return false; +} + +static void PrintComponentWrapper1(bstring code, const char *func, const char *type2, const char *type3, const char *type4) +{ + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); return a; }\n", type2, func, type2, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); return a; }\n", type3, func, type3, func, func, func); + bformata(code, "%s %s(%s a) { a.x = %s(a.x); a.y = %s(a.y); a.z = %s(a.z); a.w = %s(a.w); return a; }\n", type4, func, type4, func, func, func, func); +} + +static void PrintComponentWrapper2(bstring code, const char *func, const char *type2, const char *type3, const char *type4) +{ + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); return a; }\n", type2, func, type2, type2, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); return a; }\n", type3, func, type3, type3, func, func, func); + bformata(code, "%s %s(%s a, %s b) { a.x = %s(a.x, b.x); a.y = %s(a.y, b.y); a.z = %s(a.z, b.z); a.w = %s(a.w, b.w); return a; }\n", type4, func, type4, type4, func, func, func, func); +} + +static void PrintTrunc(bstring code, const char *type) +{ + bformata(code, "%s trunc(%s x) { return sign(x)*floor(abs(x)); }\n", type, type); +} + +void ToGLSL::UseExtraFunctionDependency(const std::string &name) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + + bstring code = bfromcstr(""); + bool match = true; + + if (name == "trunc") + { + PrintTrunc(code, "float"); + PrintTrunc(code, "vec2"); + PrintTrunc(code, "vec3"); + PrintTrunc(code, "vec4"); + } + else if (name == "roundEven") + { + bformata(code, "float roundEven(float x) { float y = floor(x + 0.5); return (y - x == 0.5) ? floor(0.5*y) * 2.0 : y; }\n"); + PrintComponentWrapper1(code, "roundEven", "vec2", "vec3", "vec4"); + } + else if (name == "op_modi") + { + bformata(code, "const int BITWISE_BIT_COUNT = 32;\nint op_modi(int x, int y) { return x - y * (x / y); }\n"); + PrintComponentWrapper2(code, "op_modi", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_and") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_and(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) && (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 && b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_and", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_or") + { + UseExtraFunctionDependency("op_modi"); + + bformata(code, "int op_or(int a, int b) { int result = 0; int n = 1; for (int i = 0; i < BITWISE_BIT_COUNT; i++) { if ((op_modi(a, 2) != 0) || (op_modi(b, 2) != 0)) { result += n; } a = a / 2; b = b / 2; n = n * 2; if (!(a > 0 || b > 0)) { break; } } return result; }\n"); + PrintComponentWrapper2(code, "op_or", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_xor") + { + UseExtraFunctionDependency("op_and"); + + bformata(code, "int op_xor(int a, int b) { return (a + b - 2 * op_and(a, b)); }\n"); + PrintComponentWrapper2(code, "op_xor", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shr") + { + bformata(code, "int op_shr(int a, int b) { return int(floor(float(a) / pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shr", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_shl") + { + bformata(code, "int op_shl(int a, int b) { return int(floor(float(a) * pow(2.0, float(b)))); }\n"); + PrintComponentWrapper2(code, "op_shl", "ivec2", "ivec3", "ivec4"); + } + else if (name == "op_not") + { + bformata(code, "int op_not(int value) { return -value - 1; }\n"); + PrintComponentWrapper1(code, "op_not", "ivec2", "ivec3", "ivec4"); + } + else if (name == "int_bitfieldInsert") + { + // Can't use the name 'bitfieldInsert' because Adreno fails with "can't redefine/overload built-in functions!" + bcatcstr(code, + "int int_bitfieldInsert(int base, int insert, int offset, int bits) {\n" + " uint mask = ~(uint(0xffffffff) << uint(bits)) << uint(offset);\n" + " return int((uint(base) & ~mask) | ((uint(insert) << uint(offset)) & mask));\n" + "}\n"); + } + else + { + match = false; + } + + if (match) + DeclareExtraFunction(name, code); + + bdestroy(code); +} diff --git a/third_party/HLSLcc/src/toGLSLDeclaration.cpp b/third_party/HLSLcc/src/toGLSLDeclaration.cpp new file mode 100644 index 0000000..efafefa --- /dev/null +++ b/third_party/HLSLcc/src/toGLSLDeclaration.cpp @@ -0,0 +1,3933 @@ +#include "hlslcc.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Shader.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "internal_includes/debug.h" +#include +#include +#include +#include +#include +#include "internal_includes/toGLSL.h" +#include "UnityInstancingFlexibleArraySize.h" + +using namespace HLSLcc; + +#ifndef fpcheck +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif +#endif // #ifndef fpcheck + +static bool UseReflection(HLSLCrossCompilerContext* psContext) +{ + return !psContext->IsSwitch() && psContext->psShader->eShaderType != COMPUTE_SHADER; +} + +static SHADER_VARIABLE_TYPE TypeToReport(SHADER_VARIABLE_TYPE type) +{ + switch (type) + { + case SVT_BOOL: + case SVT_INT: + case SVT_UINT: + case SVT_UINT8: + case SVT_FORCED_INT: + case SVT_INT_AMBIGUOUS: + case SVT_INT16: + case SVT_INT12: + case SVT_UINT16: + return SVT_UINT; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + return SVT_FLOAT; + + default: + return type; + } +} + +static void GenerateUnsupportedFormatWarning(HLSLccReflection& refl, const char* name) +{ + std::ostringstream oss; + oss << "The resource '" << name << "' uses an unsupported type/format"; + refl.OnDiagnostics(oss.str(), -1, false); +} + +static void GenerateUnsupportedReadWriteFormatWarning(HLSLccReflection& refl, const char* name) +{ + std::ostringstream oss; + oss << "The resource '" << name << "' uses an unsupported type/format for read/write access"; + refl.OnDiagnostics(oss.str(), -1, false); +} + +void ToGLSL::DeclareConstBufferShaderVariable(const char* varName, const struct ShaderVarType* psType, const struct ConstantBuffer* psCBuf, int unsizedArray, bool addUniformPrefix, bool reportInReflection) +{ + bstring glsl = *psContext->currentGLSLString; + + if (reportInReflection && !psContext->IsVulkan() && psType->Class != SVC_STRUCT && UseReflection(psContext)) + { + const bool isMatrix = psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS; + const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Type); + psContext->m_Reflection.OnConstant(varName, 0, type, psType->Rows, psType->Columns, isMatrix, psType->Elements, true); + } + + if (psType->Class == SVC_STRUCT) + { + bformata(glsl, "\t%s%s_Type %s", addUniformPrefix ? "UNITY_UNIFORM " : "", varName, varName); + if (psType->Elements > 1) + { + if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) + { + bformata(glsl, "[" UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "]"); + m_NeedUnityInstancingArraySizeDecl = true; + } + else + bformata(glsl, "[%d]", psType->Elements); + } + } + else if (psType->Class == SVC_MATRIX_COLUMNS || psType->Class == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "\t%s%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 4), psType->Rows, psType->Columns, varName); + uint32_t elemCount = (psType->Class == SVC_MATRIX_COLUMNS ? psType->Columns : psType->Rows); + if (psType->Elements > 1) + { + elemCount *= psType->Elements; + } + bformata(glsl, "[%d]", elemCount); + } + else + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetMatrixTypeName(psContext, psType->Type, psType->Columns, psType->Rows).c_str(), varName); + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + } + else if (psType->Class == SVC_VECTOR && psType->Columns > 1) + { + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, psType->Columns), varName); + + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + else if ((psType->Class == SVC_SCALAR) || + (psType->Class == SVC_VECTOR && psType->Columns == 1)) + { + if (psType->Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType *)psType)->Type = SVT_INT; + } + + bformata(glsl, "\t%s%s %s", addUniformPrefix ? "UNITY_UNIFORM " : "", HLSLcc::GetConstructorForType(psContext, psType->Type, 1), varName); + + if (psType->Elements > 1) + { + bformata(glsl, "[%d]", psType->Elements); + } + } + if (unsizedArray) + bformata(glsl, "[]"); + bformata(glsl, ";\n"); +} + +//In GLSL embedded structure definitions are not supported. +void ToGLSL::PreDeclareStructType(const std::string &name, const struct ShaderVarType* psType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t i; + + for (i = 0; i < psType->MemberCount; ++i) + { + if (psType->Members[i].Class == SVC_STRUCT) + { + PreDeclareStructType(psType->Members[i].name, &psType->Members[i]); + } + } + + if (psType->Class == SVC_STRUCT) + { + //Not supported at the moment + ASSERT(name != "$Element"); + + for (size_t i = 0; i < m_DefinedStructs.size(); ++i) + { + if (m_DefinedStructs[i] == name) + return; + } + + m_DefinedStructs.push_back(name); + + bformata(glsl, "struct %s_Type {\n", name.c_str()); + + for (i = 0; i < psType->MemberCount; ++i) + { + ASSERT(psType->Members.size() != 0); + + DeclareConstBufferShaderVariable(psType->Members[i].name.c_str(), &psType->Members[i], NULL, 0, false, false); + } + + bformata(glsl, "};\n"); + } +} + +static const char* GetInterpolationString(INTERPOLATION_MODE eMode, GLLang lang) +{ + switch (eMode) + { + case INTERPOLATION_CONSTANT: + { + return "flat "; + } + case INTERPOLATION_LINEAR: + { + return ""; + } + case INTERPOLATION_LINEAR_CENTROID: + { + return "centroid "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + return lang <= LANG_ES_310 ? "" : "noperspective "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + return lang <= LANG_ES_310 ? "centroid " : "noperspective centroid "; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + return "sample "; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + return lang <= LANG_ES_310 ? "" : "noperspective sample "; + } + default: + { + return ""; + } + } +} + +static void DeclareInput( + HLSLCrossCompilerContext* psContext, + const Declaration* psDecl, + const char* Interpolation, const char* StorageQualifier, const char* Precision, int iNumComponents, OPERAND_INDEX_DIMENSION eIndexDim, const char* InputName, const uint32_t ui32CompMask) +{ + Shader* psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + ASSERT(psSig != NULL); + + // No need to declare input pos 0 on HS control point phases, it's always position + // Also no point in declaring the builtins + if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if (regSpace == 0) + { + if ((psSig->semanticName == "POS" || psSig->semanticName == "SV_Position") && psSig->ui32SemanticIndex == 0) + return; + } + } + + if ((ui32CompMask & ~psShader->acInputDeclared[regSpace][ui32Reg]) != 0) + { + const char* vecType = "vec"; + const char* scalarType = "float"; + + switch (psSig->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + vecType = "uvec"; + scalarType = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + vecType = "ivec"; + scalarType = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (psContext->psDependencies) + { + if (psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } + + std::string locationQualifier = ""; + + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + ((psContext->flags & HLSLCC_FLAG_NVN_TARGET) && HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions))) + { + bool addLocation = false; + + // Add locations to vertex shader inputs unless disabled in flags + if (psShader->eShaderType == VERTEX_SHADER && !(psContext->flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)) + addLocation = true; + + // Add intra-shader locations if supported + if (psShader->eShaderType != VERTEX_SHADER) + addLocation = true; + + if (addLocation) + { + std::ostringstream oss; + oss << "layout(location = " << psContext->psDependencies->GetVaryingLocation(std::string(InputName), psShader->eShaderType, true, keepLocation, psShader->maxSemanticIndex) << ") "; + locationQualifier = oss.str(); + } + } + + psShader->acInputDeclared[regSpace][ui32Reg] = (char)psSig->ui32Mask; + + // Do the reflection report on vertex shader inputs + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->m_Reflection.OnInputBinding(std::string(InputName), psContext->psDependencies->GetVaryingLocation(std::string(InputName), VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex)); + } + + switch (eIndexDim) + { + case INDEX_2D: + { + if (iNumComponents == 1) + { + const uint32_t regNum = psDecl->asOperands[0].ui32RegisterNumber; + const uint32_t arraySize = psDecl->asOperands[0].aui32ArraySizes[0]; + + psContext->psShader->abScalarInput[regSpace][regNum] |= (int)ui32CompMask; + + if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s%s %s %s %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); + else + bformata(glsl, "%s%s%s %s %s %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName, arraySize); + } + else + { + if (psShader->eShaderType == HULL_SHADER || psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT_CONTROL_POINT) + bformata(glsl, "%s%s%s %s %s%d %s [];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s%s %s %s%d %s [%d];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName, + psDecl->asOperands[0].aui32ArraySizes[0]); + } + break; + } + default: + { + if (iNumComponents == 1) + { + psContext->psShader->abScalarInput[regSpace][ui32Reg] |= (int)ui32CompMask; + + bformata(glsl, "%s%s%s %s %s %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, scalarType, InputName); + } + else + { + if (psShader->aIndexedInput[regSpace][ui32Reg] > 0) + { + bformata(glsl, "%s%s%s %s %s%d %s", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + if (psShader->eShaderType == HULL_SHADER) + bcatcstr(glsl, "[];\n"); + else + bcatcstr(glsl, ";\n"); + } + else + { + if (psShader->eShaderType == HULL_SHADER) + bformata(glsl, "%s%s%s %s %s%d %s[];\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + else + bformata(glsl, "%s%s%s %s %s%d %s;\n", locationQualifier.c_str(), Interpolation, StorageQualifier, Precision, vecType, iNumComponents, InputName); + } + } + break; + } + } + } +} + +bool ToGLSL::RenderTargetDeclared(uint32_t input) +{ + if (m_DeclaredRenderTarget.find(input) != m_DeclaredRenderTarget.end()) + return true; + + m_DeclaredRenderTarget.insert(input); + return false; +} + +void ToGLSL::AddBuiltinInput(const Declaration* psDecl, const char* builtinName) +{ + Shader* psShader = psContext->psShader; + const Operand* psOperand = &psDecl->asOperands[0]; + const int regSpace = psOperand->GetRegisterSpace(psContext); + ASSERT(regSpace == 0); + + // we need to at least mark if they are scalars or not (as we might need to use vector ctor) + if (psOperand->GetNumInputElements(psContext) == 1) + psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; +} + +void ToGLSL::AddBuiltinOutput(const Declaration* psDecl, int arrayElements, const char* builtinName) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + + if (eSpecialName != NAME_CLIP_DISTANCE && eSpecialName != NAME_CULL_DISTANCE) + return; + + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + + if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], arrayElements ? arrayElements : 1)) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + 0, + &psSignature); + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + glsl = *psContext->currentGLSLString; + psContext->indent++; + if (arrayElements) + { + } + else if ((eSpecialName == NAME_CLIP_DISTANCE || eSpecialName == NAME_CULL_DISTANCE) && psContext->psShader->eShaderType != HULL_SHADER) + { + // Case 828454 : For some reason DX compiler seems to inject clip/cull distance declaration to the hull shader sometimes + // even though it's not used at all, and overlaps some completely unrelated patch constant declarations. We'll just ignore this now. + // Revisit this if this actually pops up elsewhere. + + // cull/clip distance are pretty similar (the only real difference is extension name (and functionality, but we dont care here)) + int max = psDecl->asOperands[0].GetMaxComponent(); + + if (IsESLanguage(psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_clip_cull_distance"); + else if (eSpecialName == NAME_CULL_DISTANCE) + psContext->RequireExtension("GL_ARB_cull_distance"); + const char* glName = eSpecialName == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; + + int applySwizzle = psDecl->asOperands[0].GetNumSwizzleElements() > 1 ? 1 : 0; + const char* swizzle[] = {".x", ".y", ".z", ".w"}; + + ASSERT(psSignature != NULL); + const int index = psSignature->ui32SemanticIndex; + + //Clip/Cull distance can be spread across 1 or 2 outputs (each no more than a vec4). + //Some examples: + //float4 clip[2] : SV_ClipDistance; //8 clip distances + //float3 clip[2] : SV_ClipDistance; //6 clip distances + //float4 clip : SV_ClipDistance; //4 clip distances + //float clip : SV_ClipDistance; //1 clip distance. + + //In GLSL the clip/cull distance built-in is an array of up to 8 floats. + //So vector to array conversion needs to be done here. + int multiplier = 1; + if (index == 1) + { + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (psShader->sInfo.GetOutputSignatureFromSystemValue(eSpecialName, 1, &psFirstClipSignature)) + { + if (psFirstClipSignature->ui32Mask & (1 << 3)) multiplier = 4; + else if (psFirstClipSignature->ui32Mask & (1 << 2)) multiplier = 3; + else if (psFirstClipSignature->ui32Mask & (1 << 1)) multiplier = 2; + } + } + + // Add a specially crafted comment so runtime knows to enable clip planes. + // We may end up doing 2 of these, so at runtime OR the results + uint32_t clipmask = psDecl->asOperands[0].GetAccessMask(); + if (index != 0) + clipmask <<= multiplier; + bformata(psContext->glsl, "// HLSLcc_%sDistances_%x\n", glName, clipmask); + + psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psSignature->ui32Register] = 0xff; + bformata(psContext->glsl, "vec4 phase%d_gl%sDistance%d;\n", psContext->currentPhase, glName, index); + + for (int i = 0; i < max; ++i) + { + psContext->AddIndentation(); + bformata(glsl, "%s[%d] = (", builtinName, i + multiplier * index); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + if (applySwizzle) bformata(glsl, ")%s;\n", swizzle[i]); + else bformata(glsl, ");\n"); + } + } + psContext->indent--; + psContext->currentGLSLString = &psContext->glsl; + } +} + +void ToGLSL::HandleOutputRedirect(const Declaration *psDecl, const char *Precision) +{ + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + bformata(glsl, "%s vec4 phase%d_Output%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } + + numComps = GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; + + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + bstring str = GetPostShaderCode(psContext); + TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); + bcatcstr(str, " = "); + + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToInt(" : "int("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "floatBitsToUint(" : "int("); + hasCast = 1; + } + bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; + + bformata(str, "%c", "xyzw"[i]); + } + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + bcatcstr(str, ";\n"); + } + + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToGLSL::AddUserOutput(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + if (psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + { + const Operand* psOperand = &psDecl->asOperands[0]; + const char* Precision = ""; + int iNumComponents; + bstring type = NULL; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + if (psSignature->semanticName == "POS" && psOperand->ui32RegisterNumber == 0 && psContext->psShader->eShaderType == VERTEX_SHADER) + return; + + iNumComponents = GetNumberBitsSet(psSignature->ui32Mask); + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump "; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump "; + //type = "ivec"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + //type = "uvec"; + break; + } + } + } + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + { + if (psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + } + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + psContext->EnableExtension("GL_ARB_conservative_depth"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_greater) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + psContext->EnableExtension("GL_ARB_conservative_depth"); + bcatcstr(glsl, "#ifdef GL_ARB_conservative_depth\n"); + bcatcstr(glsl, "layout (depth_less) out float gl_FragDepth;\n"); + bcatcstr(glsl, "#endif\n"); + break; + } + default: + { + uint32_t renderTarget = psDecl->asOperands[0].ui32RegisterNumber; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eTargetLanguage == LANG_ES_100 && renderTarget > 0) + psContext->EnableExtension("GL_EXT_draw_buffers"); + + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + + if (WriteToFragData(psContext->psShader->eTargetLanguage)) + { + bformata(glsl, "#define %s gl_FragData[%d]\n", OutputName, renderTarget); + } + else + { + if (!RenderTargetDeclared(renderTarget)) + { + bstring layoutQualifier = bformat(""); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + + if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if (renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + auto lq = bstr2cstr(layoutQualifier, '\0'); + + if (haveFramebufferFetch) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s%s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); + } + else + bformata(glsl, "%sout %s%s %s;\n", lq, Precision, type->data, OutputName); + + bcstrfree(lq); + bdestroy(layoutQualifier); + } + } + break; + } + } + break; + } + case VERTEX_SHADER: + case GEOMETRY_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + { + const char* Interpolation = ""; + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%s%d", psContext->outputPrefix, regSpace == 0 ? "" : "patch", psSignature->semanticName.c_str(), psSignature->ui32SemanticIndex); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == GEOMETRY_SHADER) + { + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer vertex outputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage)) + { + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + bformata(glsl, "layout(location = %d) ", psContext->psDependencies->GetVaryingLocation(std::string(OutputName), psShader->eShaderType, false, keepLocation, psShader->maxSemanticIndex)); + } + + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + // In Hull shaders outputs are either per-vertex (and need []) or per-patch (need 'out patch') + if (regSpace == 0) + bformata(glsl, "%sout %s%s %s[];\n", Interpolation, Precision, type->data, OutputName); + else + bformata(glsl, "patch %sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + bformata(glsl, "%sout %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + else + { + bformata(glsl, "%svarying %s%s %s;\n", Interpolation, Precision, type->data, OutputName); + } + + break; + } + default: + ASSERT(0); + break; + } + HandleOutputRedirect(psDecl, Precision); + bdestroy(type); + } +} + +void ToGLSL::ReportStruct(const std::string &name, const struct ShaderVarType* psType) +{ + if (psContext->IsVulkan() || psContext->IsSwitch() || psType->Class != SVC_STRUCT) + return; + + for (uint32_t i = 0; i < psType->MemberCount; ++i) + { + if (psType->Members[i].Class == SVC_STRUCT) + ReportStruct(psType->Members[i].name, &psType->Members[i]); + } + + for (uint32_t i = 0; i < psType->MemberCount; ++i) + { + const bool isMatrix = psType->Members[i].Class == SVC_MATRIX_COLUMNS || psType->Members[i].Class == SVC_MATRIX_ROWS; + const SHADER_VARIABLE_TYPE type = TypeToReport(psType->Members[i].Type); + psContext->m_Reflection.OnConstant(psType->Members[i].fullName.c_str(), 0, type, psType->Members[i].Rows, psType->Members[i].Columns, isMatrix, psType->Members[i].Elements, true); + } + + psContext->m_Reflection.OnConstant(psType->fullName.c_str(), 0, SVT_VOID, psType->Rows, psType->Columns, false, psType->Elements, true); +} + +void ToGLSL::DeclareUBOConstants(const uint32_t ui32BindingPoint, const ConstantBuffer* psCBuf, bstring glsl) +{ + uint32_t i; + + bool skipUnused = false; + + if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; + + + std::string cbName = psCBuf->name; + if (cbName == "$Globals") + { + // Need to tweak Globals struct name to prevent clashes between shader stages + char prefix = 'A'; + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + + cbName[0] = prefix; + } + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); + + uint32_t slot = 0xffffffff; + bool isKnown = true; + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(cbName, false, 1); + bformata(glsl, "layout(set = %d, binding = %d, std140) ", binding.set, binding.binding); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) + { + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(cbName, GLSLCrossDependencyData::BufferType_UBO); + isKnown = bindPointInfo.known; + slot = bindPointInfo.slot; + bformata(glsl, "UNITY_BINDING(%d) ", slot); + } + else + bcatcstr(glsl, "layout(std140) "); + + if (slot != 0xffffffff && !isKnown && UseReflection(psContext)) + { + psContext->m_Reflection.OnConstantBuffer(cbName, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(skipUnused)); + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + ReportStruct(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + } + } + + const bool reportInReflection = slot != 0xffffffff && !isKnown && UseReflection(psContext); + + bformata(glsl, "uniform %s {\n", cbName.c_str()); + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#endif\n"); + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), + &psCBuf->asVars[i].sType, psCBuf, 0, psContext->flags & HLSLCC_FLAG_WRAP_UBO ? true : false, reportInReflection); + } + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#if HLSLCC_ENABLE_UNIFORM_BUFFERS\n"); + + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "} %s;\n", instanceName.c_str()); + } + else + bcatcstr(glsl, "};\n"); + + if (psContext->flags & HLSLCC_FLAG_WRAP_UBO) + bformata(glsl, "#endif\n"); + + if (reportInReflection) + psContext->m_Reflection.OnConstantBufferBinding(cbName, slot); +} + +bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand) +{ + // with cases like: RWStructuredBuffer myBuffer; /*...*/ AtomicMin(myBuffer[0].x , myInt); + // if we translate RWStructuredBuffer template type to uint, incorrect version of the function might be called ( AtomicMin(uint..) instead of AtomicMin(int..) ) + // we try to avoid this case by using integer type in those cases + if (psContext && psOperand) + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + if (!isVulkan) + { + if (psContext->psShader && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + uint32_t ui32BindingPoint = psOperand->ui32RegisterNumber; + const ResourceBinding* psBinding = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32BindingPoint, &psBinding); + if (psBinding) + { + const ConstantBuffer* psBuffer = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_UAV, psBinding->ui32BindPoint, &psBuffer); + if (psBuffer && psBuffer->asVars.size() == 1 && psBuffer->asVars[0].sType.Type == SVT_INT /*&& psContext->IsSwitch()*/) + return true; + } + } + } + } + return false; +} + +static void DeclareBufferVariable(HLSLCrossCompilerContext* psContext, uint32_t ui32BindingPoint, + const Operand* psOperand, const uint32_t ui32GloballyCoherentAccess, + const uint32_t isRaw, const uint32_t isUAV, const uint32_t hasEmbeddedCounter, const uint32_t stride, bstring glsl) +{ + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + bstring BufNamebstr = bfromcstr(""); + // Use original HLSL bindings for UAVs only. For non-UAV buffers we have resolved new binding points from the same register space. + + ResourceName(BufNamebstr, psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + + char *btmp = bstr2cstr(BufNamebstr, '\0'); + std::string BufName = btmp; + bcstrfree(btmp); + bdestroy(BufNamebstr); + + // Declare the struct type for structured buffers + if (!isRaw) + { + const char* typeStr = "uint"; + if (isUAV && DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psOperand)) + typeStr = "int"; + bformata(glsl, " struct %s_type {\n\t%s[%d] value;\n};\n\n", BufName.c_str(), typeStr, stride / 4); + } + + uint32_t slot = 0xffffffff; + bool isKnown = true; + if (isVulkan) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(BufName); + bformata(glsl, "layout(set = %d, binding = %d, std430) ", binding.set, binding.binding); + } + else + { + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->psDependencies->GetGLSLResourceBinding(BufName, isUAV ? GLSLCrossDependencyData::BufferType_ReadWrite : GLSLCrossDependencyData::BufferType_SSBO); + slot = bindPointInfo.slot; + isKnown = bindPointInfo.known; + bformata(glsl, "layout(std430, binding = %d) ", slot); + } + + if (ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + bcatcstr(glsl, "coherent "); + + if (!isUAV) + bcatcstr(glsl, "readonly "); + + // For Nintendo Switch, adds a "decoration" to get around not being able to detect readonly modifier on the SSBO via the platform shader reflection API. + bformata(glsl, "buffer %s%s {\n\t", psContext->IsSwitch() && !isUAV ? "hlslcc_readonly" : "", BufName.c_str()); + + if (hasEmbeddedCounter) + bformata(glsl, "coherent uint %s_counter;\n\t", BufName.c_str()); + + if (isRaw) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint"); + else + bcatcstr(glsl, "int"); + } + else + bformata(glsl, "%s_type", BufName.c_str()); + + bformata(glsl, " %s_buf[];\n};\n", BufName.c_str()); + + if (!isKnown && slot != 0xffffffff && UseReflection(psContext)) + psContext->m_Reflection.OnBufferBinding(BufName, slot, isUAV); +} + +void ToGLSL::DeclareStructConstants(const uint32_t ui32BindingPoint, + const ConstantBuffer* psCBuf, const Operand* psOperand, + bstring glsl) +{ + uint32_t i; + int useGlobalsStruct = 1; + bool skipUnused = false; + + if ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) && psCBuf->name[0] == '$') + useGlobalsStruct = 0; + + if ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS) && psCBuf->name == "$Globals") + skipUnused = true; + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) == 0) + useGlobalsStruct = 0; + + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + PreDeclareStructType(psCBuf->asVars[i].name, &psCBuf->asVars[i].sType); + } + + /* [layout (location = X)] uniform vec4 HLSLConstantBufferName[numConsts]; */ + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + { + ASSERT(0); // Catch this to see what's going on + std::string bname = "wut"; + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(bname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); + } + else + { + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags)) + bformata(glsl, "layout(location = %d) ", ui32BindingPoint); + } + if (useGlobalsStruct) + { + bcatcstr(glsl, "uniform struct "); + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, "_Type {\n"); + } + else + { + if (psCBuf->name == "$Globals") + { + // GLSL needs to report $Globals in reflection so that SRP batcher can properly determine if the shader is compatible with it or not. + if (UseReflection(psContext) && !psContext->IsVulkan()) + { + size_t memberCount = 0; + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (!psCBuf->asVars[i].sType.m_IsUsed) + continue; + + memberCount += psCBuf->asVars[i].sType.GetMemberCount(); + } + + psContext->m_Reflection.OnConstantBuffer(psCBuf->name, 0, memberCount); + } + } + } + + for (i = 0; i < psCBuf->asVars.size(); ++i) + { + if (skipUnused && !psCBuf->asVars[i].sType.m_IsUsed) + continue; + + if (!useGlobalsStruct) + bcatcstr(glsl, "uniform "); + + DeclareConstBufferShaderVariable(psCBuf->asVars[i].name.c_str(), &psCBuf->asVars[i].sType, psCBuf, 0, false, true); + } + + if (useGlobalsStruct) + { + bcatcstr(glsl, "} "); + + TranslateOperand(psOperand, TO_FLAG_DECLARATION_NAME); + + bcatcstr(glsl, ";\n"); + } +} + +static const char* GetVulkanTextureType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureBuffer"; + case RETURN_TYPE_UINT: + return "utextureBuffer"; + default: + return "textureBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1D"; + case RETURN_TYPE_UINT: + return "utexture1D"; + default: + return "texture1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2D"; + case RETURN_TYPE_UINT: + return "utexture2D"; + default: + return "texture2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMS"; + case RETURN_TYPE_UINT: + return "utexture2DMS"; + default: + return "texture2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture3D"; + case RETURN_TYPE_UINT: + return "utexture3D"; + default: + return "texture3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCube"; + case RETURN_TYPE_UINT: + return "utextureCube"; + default: + return "textureCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture1DArray"; + case RETURN_TYPE_UINT: + return "utexture1DArray"; + default: + return "texture1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DArray"; + case RETURN_TYPE_UINT: + return "utexture2DArray"; + default: + return "texture2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itexture2DMSArray"; + case RETURN_TYPE_UINT: + return "utexture2DMSArray"; + default: + return "texture2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "itextureCubeArray"; + case RETURN_TYPE_UINT: + return "utextureCubeArray"; + default: + return "textureCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + } + + return "texture2D"; +} + +static HLSLCC_TEX_DIMENSION GetTextureDimension(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_TEXTURE1D: + return eType == RETURN_TYPE_SINT || eType == RETURN_TYPE_UINT ? TD_INT : TD_FLOAT; + + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + return TD_2D; + + case RESOURCE_DIMENSION_TEXTURE3D: + return TD_3D; + + case RESOURCE_DIMENSION_TEXTURECUBE: + return TD_CUBE; + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + return TD_2DARRAY; + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + return TD_CUBEARRAY; + default: + ASSERT(0); + break; + } + + return TD_2D; +} + +// Not static because this is used in toGLSLInstruction.cpp when sampling Vulkan textures +const char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_EXT_texture_buffer"); + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerBuffer"; + case RETURN_TYPE_UINT: + return "usamplerBuffer"; + default: + return "samplerBuffer"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler1D"; + case RETURN_TYPE_UINT: + return "usampler1D"; + default: + return "sampler1D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2D"; + case RETURN_TYPE_UINT: + return "usampler2D"; + default: + return "sampler2D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMS"; + case RETURN_TYPE_UINT: + return "usampler2DMS"; + default: + return "sampler2DMS"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler3D"; + case RETURN_TYPE_UINT: + return "usampler3D"; + default: + return "sampler3D"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerCube"; + case RETURN_TYPE_UINT: + return "usamplerCube"; + default: + return "samplerCube"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler1DArray"; + case RETURN_TYPE_UINT: + return "usampler1DArray"; + default: + return "sampler1DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DArray"; + case RETURN_TYPE_UINT: + return "usampler2DArray"; + default: + return "sampler2DArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_OES_texture_storage_multisample_2d_array"); + switch (eType) + { + case RETURN_TYPE_SINT: + return "isampler2DMSArray"; + case RETURN_TYPE_UINT: + return "usampler2DMSArray"; + default: + return "sampler2DMSArray"; + } + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + switch (eType) + { + case RETURN_TYPE_SINT: + return "isamplerCubeArray"; + case RETURN_TYPE_UINT: + return "usamplerCubeArray"; + default: + return "samplerCubeArray"; + } + break; + } + default: + ASSERT(0); + break; + } + + return "sampler2D"; +} + +static const char *GetSamplerPrecision(const HLSLCrossCompilerContext *psContext, REFLECT_RESOURCE_PRECISION ePrec) +{ + if (!HavePrecisionQualifiers(psContext)) + return " "; + + switch (ePrec) + { + default: + case REFLECT_RESOURCE_PRECISION_UNKNOWN: + case REFLECT_RESOURCE_PRECISION_LOWP: + return EmitLowp(psContext) ? "lowp " : "mediump "; + case REFLECT_RESOURCE_PRECISION_HIGHP: + return "highp "; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + return "mediump "; + } +} + +static void TranslateVulkanResource(HLSLCrossCompilerContext* psContext, const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + const char *samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + const char* samplerTypeName = GetVulkanTextureType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(tname); + bformata(glsl, "layout(set = %d, binding = %d) ", binding.set, binding.binding); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); +} + +static void TranslateResourceTexture(HLSLCrossCompilerContext* psContext, const Declaration* psDecl, uint32_t samplerCanDoShadowCmp) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + const char *samplerPrecision = NULL; + std::set::iterator i; + + const char* samplerTypeName = GetSamplerType(psContext, + psDecl->value.eResourceDimension, + psDecl->asOperands[0].ui32RegisterNumber); + + if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_TEXTURECUBEARRAY + && !HaveCubemapArray(psContext->psShader->eTargetLanguage)) + { + // Need to enable extension (either OES or ARB), but we only need to add it once + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + { + psContext->EnableExtension("GL_OES_texture_cube_map_array"); + psContext->EnableExtension("GL_EXT_texture_cube_map_array"); + } + else + psContext->RequireExtension("GL_ARB_texture_cube_map_array"); + } + + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + psContext->EnableExtension("GL_EXT_shadow_samplers"); + } + + const ResourceBinding *psBinding = NULL; + psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, &psBinding); + ASSERT(psBinding != NULL); + + samplerPrecision = GetSamplerPrecision(psContext, psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN); + + if (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) + { + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 1); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + for (i = psDecl->samplersUsed.begin(); i != psDecl->samplersUsed.end(); i++) + { + std::string tname = TextureSamplerName(&psShader->sInfo, psDecl->asOperands[0].ui32RegisterNumber, *i, 0); + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } + } + + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, " "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + + if (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex) + { + //Create shadow and non-shadow sampler. + //HLSL does not have separate types for depth compare, just different functions. + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 1); + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || + ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) + { + GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); + bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); + } + bcatcstr(glsl, "uniform "); + bcatcstr(glsl, samplerPrecision); + bcatcstr(glsl, samplerTypeName); + bcatcstr(glsl, "Shadow "); + bcatcstr(glsl, tname.c_str()); + bcatcstr(glsl, ";\n"); + } +} + +void ToGLSL::HandleInputRedirect(const Declaration *psDecl, const char *Precision) +{ + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + bstring glsl = *psContext->currentGLSLString; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0) + { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + psContext->AddIndentation(); + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + psContext->indent++; + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + bstring str = GetEarlyMain(psContext); + if (needsLooping) + bformata(str, "phase%d_Input%d_%d[%d] = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i); + else + bformata(str, "phase%d_Input%d_%d = vec4(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "intBitsToFloat(" : "float("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, HaveBitEncodingOps(psContext->psShader->eTargetLanguage) ? "uintBitsToFloat(" : "float("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + TranslateOperand(str, psOperand, TO_FLAG_NAME_ONLY); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(str, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(str, ", "); + } + bcatcstr(str, ");\n"); + } + while ((--i) >= 0); + + psContext->indent--; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToGLSL::TranslateDeclaration(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + switch (psDecl->eOpcode) + { + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + { + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + switch (eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psDecl, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER) + { + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinInput(psDecl, "gl_ClipDistance"); + break; + } + case NAME_CULL_DISTANCE: + { + AddBuiltinInput(psDecl, "gl_CullDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_ViewportIndex"); + break; + } + case NAME_INSTANCE_ID: + { + AddBuiltinInput(psDecl, "gl_InstanceID"); + break; + } + case NAME_IS_FRONT_FACE: + { + /* + Cast to int used because + if(gl_FrontFacing != 0) failed to compiled on Intel HD 4000. + Suggests no implicit conversion for bool<->int. + */ + + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + AddBuiltinInput(psDecl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int + else + AddBuiltinInput(psDecl, "(gl_FrontFacing ? 1 : 0)"); + break; + } + case NAME_SAMPLE_INDEX: + { + // Using gl_SampleID requires either GL_OES_sample_variables or #version 320 es + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + psContext->RequireExtension("GL_OES_sample_variables"); + AddBuiltinInput(psDecl, "gl_SampleID"); + break; + } + case NAME_VERTEX_ID: + { + AddBuiltinInput(psDecl, "gl_VertexID"); + break; + } + case NAME_PRIMITIVE_ID: + { + if (psShader->eShaderType == GEOMETRY_SHADER) + AddBuiltinInput(psDecl, "gl_PrimitiveIDIn"); // LOL opengl. + else + AddBuiltinInput(psDecl, "gl_PrimitiveID"); + break; + } + default: + { + bformata(glsl, "in vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + } + } + break; + } + + case OPCODE_DCL_OUTPUT_SIV: + { + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinOutput(psDecl, 0, "gl_Position"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_Layer"); + if (psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + if (psContext->IsVulkan()) + { + psContext->RequireExtension("GL_ARB_shader_viewport_layer_array"); + } + else if (psContext->IsSwitch()) + { + psContext->RequireExtension("GL_NV_viewport_array2"); + } + else if (psShader->eShaderType == VERTEX_SHADER) // case 1261150 + { + psContext->RequireExtension("GL_AMD_vertex_shader_layer"); + } + } + + break; + } + case NAME_CLIP_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_ClipDistance"); + break; + } + case NAME_CULL_DISTANCE: + { + AddBuiltinOutput(psDecl, 0, "gl_CullDistance"); + break; + } + case NAME_VIEWPORT_ARRAY_INDEX: + { + AddBuiltinOutput(psDecl, 0, "gl_ViewportIndex"); + break; + } + case NAME_VERTEX_ID: + { + ASSERT(0); //VertexID is not an output + break; + } + case NAME_PRIMITIVE_ID: + { + AddBuiltinOutput(psDecl, 0, "gl_PrimitiveID"); + break; + } + case NAME_INSTANCE_ID: + { + ASSERT(0); //InstanceID is not an output + break; + } + case NAME_IS_FRONT_FACE: + { + ASSERT(0); //FrontFacing is not an output + break; + } + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 4, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[3]"); + break; + } + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 3, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[2]"); + break; + } + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelOuter"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[0]"); + } + break; + } + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelOuter[1]"); + break; + } + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + { + if (psContext->psShader->aIndexedOutput[1][psDecl->asOperands[0].ui32RegisterNumber]) + { + AddBuiltinOutput(psDecl, 2, "gl_TessLevelInner"); + } + else + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[0]"); + } + break; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + { + AddBuiltinOutput(psDecl, 0, "gl_TessLevelInner[1]"); + break; + } + default: + { + // Sometimes DX compiler seems to declare patch constant outputs like this. Anyway, nothing for us to do. +// bformata(glsl, "out vec4 %s;\n", psDecl->asOperands[0].specialName.c_str()); + +/* bcatcstr(glsl, "#define "); + TranslateOperand(psContext, &psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, " %s\n", psDecl->asOperands[0].pszSpecialName); + break;*/ + } + } + break; + } + case OPCODE_DCL_INPUT: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "attribute"; + std::string inputName; + const char* Precision = ""; + + if ((psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) || + (psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) || + (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) || + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + // Also skip position input to hull and domain shader + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && + (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER)) + { + const ShaderInfo::InOutSignature *psIn = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + ASSERT(psIn != NULL); + + if ((psIn->semanticName == "SV_POSITION" || psIn->semanticName == "SV_Position" + || psIn->semanticName == "POS" || psIn->semanticName == "POSITION") && psIn->ui32SemanticIndex == 0) + break; + } + + //Already declared as part of an array. + if (psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + + // In the case of the Hull Shader, due to the different phases, we might have already delcared this input + // so check to see if that is the case, and if not record it + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if (psContext->psDependencies->IsHullShaderInputAlreadyDeclared(inputName)) + { + return; + } + + psContext->psDependencies->RecordHullShaderInput(inputName); + } + + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + StorageQualifier = "patch in"; + else + StorageQualifier = "in"; + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + const char * Interpolation = ""; + + if (psShader->eShaderType == GEOMETRY_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature, true); + + if ((psSignature != NULL) && (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32)) // GLSL spec requires that integer inputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else if (psContext->psDependencies) // For floats we get the interpolation that was resolved from the fragment shader input + { + Interpolation = GetInterpolationString(psContext->psDependencies->GetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber), psContext->psShader->eTargetLanguage); + } + } + + DeclareInput(psContext, psDecl, + Interpolation, StorageQualifier, Precision, iNumComponents, (OPERAND_INDEX_DIMENSION)psOperand->iIndexDims, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + break; + } + case OPCODE_DCL_INPUT_PS_SIV: + { + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + { + AddBuiltinInput(psDecl, "gl_FragCoord"); + bcatcstr(GetEarlyMain(psContext), "vec4 hlslcc_FragCoord = vec4(gl_FragCoord.xyz, 1.0/gl_FragCoord.w);\n"); + break; + } + case NAME_RENDER_TARGET_ARRAY_INDEX: + { + AddBuiltinInput(psDecl, "gl_Layer"); + break; + } + default: + ASSERT(0); + break; + } + break; + } + case OPCODE_DCL_INPUT_SIV: + { + if (psShader->eShaderType == PIXEL_SHADER && psContext->psDependencies) + { + psContext->psDependencies->SetInterpolationMode(psDecl->asOperands[0].ui32RegisterNumber, psDecl->value.eInterpolation); + } + break; + } + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents = psOperand->GetNumInputElements(psContext); + const char* StorageQualifier = "varying"; + const char* Precision = ""; + std::string inputName; + const char* Interpolation = ""; + int hasNoPerspective = psContext->psShader->eTargetLanguage <= LANG_ES_310 ? 0 : 1; + inputName = psContext->GetDeclaredInputName(psOperand, NULL, 1, NULL); + + if (InOutSupported(psContext->psShader->eTargetLanguage)) + { + StorageQualifier = "in"; + } + const ShaderInfo::InOutSignature* psSignature = NULL; + + psShader->sInfo.GetInputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + + if (psSignature->eComponentType == INOUT_COMPONENT_UINT32 || + psSignature->eComponentType == INOUT_COMPONENT_SINT32) // GLSL spec requires that integer inputs always have "flat" interpolation + { + Interpolation = GetInterpolationString(INTERPOLATION_CONSTANT, psContext->psShader->eTargetLanguage); + } + else + { + switch (psDecl->value.eInterpolation) + { + case INTERPOLATION_CONSTANT: + { + Interpolation = "flat "; + break; + } + case INTERPOLATION_LINEAR: + { + break; + } + case INTERPOLATION_LINEAR_CENTROID: + { + Interpolation = "centroid "; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + { + Interpolation = hasNoPerspective ? "noperspective " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + { + Interpolation = hasNoPerspective ? "noperspective centroid " : "centroid"; + break; + } + case INTERPOLATION_LINEAR_SAMPLE: + { + Interpolation = hasNoPerspective ? "sample " : ""; + break; + } + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + { + Interpolation = hasNoPerspective ? "noperspective sample " : ""; + break; + } + default: + ASSERT(0); + break; + } + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + { + Precision = "highp"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + case OPERAND_MIN_PRECISION_SINT_16: + { + Precision = "mediump"; + break; + } + case OPERAND_MIN_PRECISION_UINT_16: + { + Precision = "mediump"; + break; + } + } + } + + bool haveFramebufferFetch = (psShader->extensions->EXT_shader_framebuffer_fetch && + psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH); + + // If this is a SV_Target input and framebuffer fetch is enabled, do special input declaration unless output is declared later + if (haveFramebufferFetch && psOperand->iPSInOut && inputName.size() == 13 && !strncmp(inputName.c_str(), "vs_SV_Target", 12)) + { + bstring type = NULL; + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + if (iNumComponents > 1) + type = bformat("uvec%d", iNumComponents); + else + type = bformat("uint"); + break; + } + case INOUT_COMPONENT_SINT32: + { + if (iNumComponents > 1) + type = bformat("ivec%d", iNumComponents); + else + type = bformat("int"); + break; + } + case INOUT_COMPONENT_FLOAT32: + { + if (iNumComponents > 1) + type = bformat("vec%d", iNumComponents); + else + type = bformat("float"); + break; + } + default: + ASSERT(0); + break; + } + + uint32_t renderTarget = psSignature->ui32SemanticIndex; + + char OutputName[512]; + bstring oname; + oname = bformat("%s%s%d", psContext->outputPrefix, psSignature->semanticName.c_str(), renderTarget); + strncpy(OutputName, (char *)oname->data, 512); + bdestroy(oname); + + if (WriteToFragData(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "#define vs_%s gl_LastFragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define vs_%s gl_FragData[%d]\n", OutputName, renderTarget); + bcatcstr(glsl, "#endif\n"); + } + else + { + if (!RenderTargetDeclared(renderTarget)) + { + bstring layoutQualifier = bformat(""); + + if (HaveInOutLocationQualifier(psContext->psShader->eTargetLanguage) || + HaveLimitedInOutLocationQualifier(psContext->psShader->eTargetLanguage, psContext->psShader->extensions)) + { + uint32_t index = 0; + + if ((psContext->flags & HLSLCC_FLAG_DUAL_SOURCE_BLENDING) && DualSourceBlendSupported(psContext->psShader->eTargetLanguage)) + { + if (renderTarget > 0) + { + renderTarget = 0; + index = 1; + } + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d, index = %d) ", renderTarget, index); + } + else + { + bdestroy(layoutQualifier); + layoutQualifier = bformat("layout(location = %d) ", renderTarget); + } + } + + auto lq = bstr2cstr(layoutQualifier, '\0'); + + bcatcstr(glsl, "#ifdef GL_EXT_shader_framebuffer_fetch\n"); + bformata(glsl, "%sinout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "%sout %s %s %s;\n", lq, Precision, type->data, OutputName); + bcatcstr(glsl, "#endif\n"); + + bcstrfree(lq); + bdestroy(layoutQualifier); + } + } + break; + } + + DeclareInput(psContext, psDecl, + Interpolation, StorageQualifier, Precision, iNumComponents, INDEX_1D, inputName.c_str(), psOperand->ui32CompMask); + + HandleInputRedirect(psDecl, Precision); + + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + bool usePrecision = (HavePrecisionQualifiers(psContext) != 0); + // Default values for temp variables allow avoiding Switch shader compiler incorrect warnings + // related to potential use of uninitialized variables (false-positives from compiler). + bool useDefaultInit = psContext->IsSwitch(); + + for (i = 0; i < ui32NumTemps; i++) + { + if (useDefaultInit) + { + if (psShader->psFloatTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psFloat16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psFloat10TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psIntTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psInt16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psInt12TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psUIntTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psUInt16TempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + if (psShader->psBoolTempSizes[i] != 0) + { + const char* constructor = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision); + const char* constructorNoPrecision = HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], false); + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d = %s(0);\n", constructor, i, constructorNoPrecision); + } + } + else + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i], usePrecision), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i], usePrecision), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i], usePrecision), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i], usePrecision), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i], usePrecision), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i], usePrecision), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i], usePrecision), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i], usePrecision), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i], usePrecision), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(glsl, "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i], usePrecision), i); + } + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const Operand* psOperand = &psDecl->asOperands[0]; + const uint32_t ui32BindingPoint = psOperand->aui32ArraySizes[0]; + + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf); + + // We don't have a original resource name, maybe generate one??? + if (!psCBuf) + { + char name[24]; + sprintf(name, "ConstantBuffer%d", ui32BindingPoint); + + GLSLCrossDependencyData::GLSLBufferBindPointInfo bindPointInfo = psContext->IsVulkan() ? + GLSLCrossDependencyData::GLSLBufferBindPointInfo{ ui32BindingPoint, true } : psContext->psDependencies->GetGLSLResourceBinding(name, GLSLCrossDependencyData::BufferType_Constant); + + bool isKnown = bindPointInfo.known; + uint32_t actualBindingPoint = bindPointInfo.slot; + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || (psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS)) + { + if (!psContext->IsVulkan() && !isKnown && UseReflection(psContext)) + psContext->m_Reflection.OnConstantBufferBinding(name, actualBindingPoint); + bformata(glsl, "UNITY_LOCATION(%d) ", actualBindingPoint); + } + + bformata(glsl, "layout(std140) uniform %s {\n\tvec4 data[%d];\n} cb%d;\n", name, psOperand->aui32ArraySizes[1], ui32BindingPoint); + break; + } + + if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) + { + // Special case for vulkan subpass input. + + // The multisample versions have multiple members in the cbuffer, but we must only declare once. + // We still need to loop through all the variables and adjust names + + // Pick up the type and index + char ty = psCBuf->name[20]; + int idx = psCBuf->name[22] - '0'; + bool isMS = false; + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding((std::string &)psCBuf->name, false, 2); + + bool declared = false; + for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) + { + ShaderVar &sv = (ShaderVar &)*itr; + if (sv.name.substr(0, 15) == "hlslcc_fbinput_") + { + if (!declared) + { + switch (ty) + { + case 'f': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); + break; + case 'h': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); + break; + case 'i': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); + break; + case 'u': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInput %s;\n", idx, binding.set, binding.binding, sv.name.c_str()); + break; + case 'F': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform highp subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'H': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform mediump subpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'I': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform isubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + case 'U': + bformata(glsl, "layout(input_attachment_index = %d, set = %d, binding = %d) uniform usubpassInputMS %s;\n", idx, binding.set, binding.binding, sv.name.substr(0, 16).c_str()); + isMS = true; + break; + default: + break; + } + declared = true; + } + else + { + if (ty == 'F' || ty == 'I' || ty == 'U') + isMS = true; + } + // Munge the name so it'll get the correct function call in GLSL directly + sv.name.insert(0, "subpassLoad("); + if (isMS) + sv.name.append(","); + else + sv.name.append(")"); + // Also update the type name + sv.sType.name = sv.name; + sv.sType.fullName = sv.name; + } + } + + // Break out so this doesn't get declared. + break; + } + + if (psCBuf->name == "OVR_multiview") + { + // Special case for piggy-backing multiview info out + // This is not really a cbuffer, but if we see this being accessed, we know we need viewID + + // Extract numViews + uint32_t numViews = 0; + for (std::vector::const_iterator itr = psCBuf->asVars.begin(); itr != psCBuf->asVars.end(); itr++) + { + if (strncmp(itr->name.c_str(), "numViews_", 9) == 0) + { + // I really don't think we'll ever have more than 9 multiviews + numViews = itr->name[9] - '0'; + break; + } + } + if (numViews > 0 && numViews < 10) + { + // multiview2 is required because we have built-in shaders that do eye-dependent work other than just position + psContext->RequireExtension("GL_OVR_multiview2"); + + if (psShader->eShaderType == VERTEX_SHADER) + bformata(glsl, "layout(num_views = %d) in;\n", numViews); + + break; // Break out so we don't actually declare this cbuffer + } + } + + if (IsPreTransformConstantBufferName(psCBuf->name.c_str())) + { + m_NeedUnityPreTransformDecl = true; + break; // Break out so we don't actually declare this cbuffer + } + + if (psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) + { + if (psContext->flags & HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO && psCBuf->name[0] == '$') + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + else + { + DeclareUBOConstants(ui32BindingPoint, psCBuf, glsl); + } + } + else + { + DeclareStructConstants(ui32BindingPoint, psCBuf, psOperand, glsl); + } + break; + } + case OPCODE_DCL_RESOURCE: + { + psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; + + // Vulkan doesn't use combined textures+samplers, so do own handling in a separate func + if (psContext->IsVulkan()) + { + TranslateVulkanResource(psContext, psDecl); + break; + } + + if (HaveUniformBindingsAndLocations(psContext->psShader->eTargetLanguage, psContext->psShader->extensions, psContext->flags) || + ((psContext->flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) && ((psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS))) + { + std::string tname = ResourceName(psContext, RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::GLSLBufferBindPointInfo slotInfo = psContext->psDependencies->GetGLSLResourceBinding(tname, GLSLCrossDependencyData::BufferType_Texture); + + bformata(glsl, "UNITY_LOCATION(%d) ", slotInfo.slot); + if (!slotInfo.known && UseReflection(psContext)) + { + const RESOURCE_DIMENSION dim = psDecl->value.eResourceDimension; + if (dim == RESOURCE_DIMENSION_BUFFER) + psContext->m_Reflection.OnBufferBinding(tname, slotInfo.slot, false); + else + { + bool isMSAATex = (dim == RESOURCE_DIMENSION_TEXTURE2DMS) || (dim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY); + psContext->m_Reflection.OnTextureBinding(tname, slotInfo.slot, slotInfo.slot, isMSAATex, GetTextureDimension(psContext, dim, psDecl->asOperands[0].ui32RegisterNumber), false); + } + } + } + + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + bcatcstr(glsl, "uniform "); + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "highp "); + bformata(glsl, "%s ", GetSamplerType(psContext, + RESOURCE_DIMENSION_BUFFER, + psDecl->asOperands[0].ui32RegisterNumber)); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psContext, psDecl, 1); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psContext, psDecl, 0); + break; + } + + default: + ASSERT(0); + break; + } + break; + } + case OPCODE_DCL_OUTPUT: + { + bool needsDeclare = true; + if (psShader->eShaderType == HULL_SHADER && psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE && psDecl->asOperands[0].ui32RegisterNumber == 0) + { + // Need extra check from signature: + const ShaderInfo::InOutSignature *sig = NULL; + psShader->sInfo.GetOutputSignatureFromRegister(0, psDecl->asOperands->GetAccessMask(), 0, &sig, true); + if (!sig || sig->semanticName == "POSITION" || sig->semanticName == "POS" || sig->semanticName == "SV_Position") + { + needsDeclare = false; + AddBuiltinOutput(psDecl, 0, "gl_out[gl_InvocationID].gl_Position"); + } + } + + if (needsDeclare) + { + AddUserOutput(psDecl); + } + break; + } + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; + + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) + { + bcatcstr(glsl, "layout(early_fragment_tests) in;\n"); + psShader->sInfo.bEarlyFragmentTests = true; + } + if ((ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED) && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) + { + static const char * const types[] = + { + "vec4", "ivec4", "bvec4", "uvec4" + }; + + for (int i = 0; i < sizeof(types) / sizeof(types[0]); ++i) + { + char const * t = types[i]; + bformata(glsl, "precise %s u_xlat_precise_%s;\n", t, t); + } + } + if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + psContext->EnableExtension("GL_ARB_gpu_shader_fp64"); + psShader->fp64 = 1; + } + break; + } + + case OPCODE_DCL_THREAD_GROUP: + { + bformata(glsl, "layout(local_size_x = %d, local_size_y = %d, local_size_z = %d) in;\n", + psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + // Invert triangle winding order to match glsl better, except on vulkan + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) == 0) + { + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + } + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + if (psContext->psShader->eShaderType == DOMAIN_SHADER) + { + switch (psDecl->value.eTessDomain) + { + case TESSELLATOR_DOMAIN_ISOLINE: + { + bcatcstr(glsl, "layout(isolines) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_TRI: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case TESSELLATOR_DOMAIN_QUAD: + { + bcatcstr(glsl, "layout(quads) in;\n"); + break; + } + default: + { + break; + } + } + } + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; + } + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + switch (psDecl->value.eOutputPrimitiveTopology) + { + case PRIMITIVE_TOPOLOGY_POINTLIST: + { + bcatcstr(glsl, "layout(points) out;\n"); + break; + } + case PRIMITIVE_TOPOLOGY_LINELIST_ADJ: + case PRIMITIVE_TOPOLOGY_LINESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_LINELIST: + case PRIMITIVE_TOPOLOGY_LINESTRIP: + { + bcatcstr(glsl, "layout(line_strip) out;\n"); + break; + } + + case PRIMITIVE_TOPOLOGY_TRIANGLELIST_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP_ADJ: + case PRIMITIVE_TOPOLOGY_TRIANGLESTRIP: + case PRIMITIVE_TOPOLOGY_TRIANGLELIST: + { + bcatcstr(glsl, "layout(triangle_strip) out;\n"); + break; + } + default: + { + break; + } + } + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + bformata(glsl, "layout(max_vertices = %d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + switch (psDecl->value.eInputPrimitive) + { + case PRIMITIVE_POINT: + { + bcatcstr(glsl, "layout(points) in;\n"); + break; + } + case PRIMITIVE_LINE: + { + bcatcstr(glsl, "layout(lines) in;\n"); + break; + } + case PRIMITIVE_LINE_ADJ: + { + bcatcstr(glsl, "layout(lines_adjacency) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE: + { + bcatcstr(glsl, "layout(triangles) in;\n"); + break; + } + case PRIMITIVE_TRIANGLE_ADJ: + { + bcatcstr(glsl, "layout(triangles_adjacency) in;\n"); + break; + } + default: + { + break; + } + } + break; + } + case OPCODE_DCL_INTERFACE: + { + const uint32_t interfaceID = psDecl->value.iface.ui32InterfaceID; + const uint32_t numUniforms = psDecl->value.iface.ui32ArraySize; + const uint32_t ui32NumBodiesPerTable = psContext->psShader->funcPointer[interfaceID].ui32NumBodiesPerTable; + ShaderVar* psVar; + uint32_t varFound; + + const char* uniformName; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(interfaceID, &psVar); + ASSERT(varFound); + uniformName = &psVar->name[0]; + + bformata(glsl, "subroutine uniform SubroutineType %s[%d*%d];\n", uniformName, numUniforms, ui32NumBodiesPerTable); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + //bformata(glsl, "void Func%d();//%d\n", psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].eType); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + break; + } + case OPCODE_CUSTOMDATA: + { + // On Vulkan we just spew the data in uints as-is + if (psContext->IsVulkan()) + { + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "const uvec4 ImmCB_%d[] = uvec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + bformata(glsl, "\tuvec4(0x%X, 0x%X, 0x%X, 0x%X)", data.a, data.b, data.c, data.d); + }); + bcatcstr(glsl, ");\n"); + } + else if (psContext->IsSwitch()) + { + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "const vec4 ImmCB_%d[] = vec4[%d] (\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + bformata(glsl, "vec4(uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)), uintBitsToFloat(uint(0x%Xu)))", data.a, data.b, data.c, data.d); + }); + bcatcstr(glsl, ");\n"); + } + else + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + ShaderPhase &sp = psShader->asPhases[psContext->currentPhase]; + std::for_each(sp.m_ConstantArrayInfo.m_Chunks.begin(), sp.m_ConstantArrayInfo.m_Chunks.end(), [this](const std::pair &chunk) + { + bstring glsl = *psContext->currentGLSLString; + uint32_t componentCount = chunk.second.m_ComponentCount; + // Just do the declaration here and contents to earlyMain. + if (componentCount == 1) + bformata(glsl, "float ImmCB_%d_%d_%d[%d];\n", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + else + bformata(glsl, "vec%d ImmCB_%d_%d_%d[%d];\n", componentCount, psContext->currentPhase, chunk.first, chunk.second.m_Rebase, chunk.second.m_Size); + + if (!HaveDynamicIndexing(psContext)) + { + bstring name = bfromcstr(""); + bformata(name, "ImmCB_%d_%d_%d", psContext->currentPhase, chunk.first, chunk.second.m_Rebase); + SHADER_VARIABLE_CLASS eClass = componentCount > 1 ? SVC_VECTOR : SVC_SCALAR; + + DeclareDynamicIndexWrapper((const char *)name->data, eClass, SVT_FLOAT, 1, componentCount, chunk.second.m_Size); + bdestroy(name); + } + + bstring tgt = psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + Declaration *psDecl = psContext->psShader->asPhases[psContext->currentPhase].m_ConstantArrayInfo.m_OrigDeclaration; + if (componentCount == 1) + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = ", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i); + if (fpcheck(val[chunk.second.m_Rebase]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[chunk.second.m_Rebase]); + bcatcstr(tgt, ";\n"); + } + } + else + { + for (uint32_t i = 0; i < chunk.second.m_Size; i++) + { + float val[4] = { + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].a, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].b, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].c, + *(float*)&psDecl->asImmediateConstBuffer[i + chunk.first].d + }; + bformata(tgt, "\tImmCB_%d_%d_%d[%d] = vec%d(", psContext->currentPhase, chunk.first, chunk.second.m_Rebase, i, componentCount); + for (uint32_t k = 0; k < componentCount; k++) + { + if (k != 0) + bcatcstr(tgt, ", "); + if (fpcheck(val[k]) && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bformata(tgt, "uintBitsToFloat(uint(0x%Xu))", *(uint32_t *)&val[k + chunk.second.m_Rebase]); + else + HLSLcc::PrintFloat(tgt, val[k + chunk.second.m_Rebase]); + } + bcatcstr(tgt, ");\n"); + } + } + }); + } + + + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(glsl, "vec%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "vec"; + const char* Precision = ""; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uvec"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "ivec"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + } + + if (HavePrecisionQualifiers(psContext)) + { + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + { + Precision = "highp "; + break; + } + case MIN_PRECISION_ANY_16: + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_SINT_16: + case MIN_PRECISION_UINT_16: + { + Precision = "mediump "; + break; + } + case MIN_PRECISION_FLOAT_2_8: + { + Precision = EmitLowp(psContext) ? "lowp " : "mediump "; + break; + } + } + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + bformata(glsl, "%s%s4 phase%d_%sput%d_%d[%d];\n", Precision, type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + oldString = psContext->currentGLSLString; + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + if (regSpace == 0) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + bcatcstr(glsl, " = "); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 1); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } + default: + // TODO Input index ranges. + ASSERT(0); + } + break; + } + case OPCODE_HS_DECLS: + { + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + bformata(glsl, "layout(vertices=%d) out;\n", psDecl->value.ui32MaxOutputVertexCount); + } + break; + } + case OPCODE_HS_FORK_PHASE: + { + break; + } + case OPCODE_HS_JOIN_PHASE: + { + break; + } + case OPCODE_DCL_SAMPLER: + { + if (psContext->IsVulkan()) + { + ResourceBinding *pRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, (const ResourceBinding **)&pRes); + ASSERT(pRes != NULL); + std::string name = ResourceName(psContext, RGROUP_SAMPLER, psDecl->asOperands[0].ui32RegisterNumber, 0); + const char *samplerPrecision = GetSamplerPrecision(psContext, pRes->ePrecision); + + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + const char *samplerType = psDecl->value.eSamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON ? "samplerShadow" : "sampler"; + bformata(glsl, "layout(set = %d, binding = %d) uniform %s %s %s;\n", binding.set, binding.binding, samplerPrecision, samplerType, name.c_str()); + // Store the sampler mode to ShaderInfo, it's needed when we use the sampler + pRes->m_SamplerMode = psDecl->value.eSamplerMode; + } + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + //For GLSL the max tessellation factor is fixed to the value of gl_MaxTessGenLevel. + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + // non-float images need either 'i' or 'u' prefix. + char imageTypePrefix[2] = { 0, 0 }; + uint32_t bindpoint = psDecl->asOperands[0].ui32RegisterNumber; + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + + if (psDecl->sUAV.ui32GloballyCoherentAccess & GLOBALLY_COHERENT_ACCESS) + { + bcatcstr(glsl, "coherent "); + } + + // Use 4 component format as a fallback if no instruction defines it + const uint32_t numComponents = psDecl->sUAV.ui32NumComponents > 0 ? psDecl->sUAV.ui32NumComponents : 4; + REFLECT_RESOURCE_PRECISION precision = REFLECT_RESOURCE_PRECISION_UNKNOWN; + + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) && + !(psContext->flags & HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS) && !isVulkan) + { //Special case on desktop glsl: writeonly image does not need format qualifier + bformata(glsl, "writeonly layout(binding=%d) ", bindpoint); + } + else + { + if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ)) + bcatcstr(glsl, "writeonly "); + else if (!(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE)) + bcatcstr(glsl, "readonly "); + + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) && IsESLanguage(psShader->eTargetLanguage)) + { + // Need to require the extension + psContext->RequireExtension("GL_EXT_texture_buffer"); + } + + if (psContext->IsSwitch() && !(psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_ATOMIC)) + { + // Switch supports the GL_EXT_shader_image_load_formatted extension but it does require being enabled. + // Allows imageLoad() to do formatted reads and match the ld_uav_typed_indexable instruction. + // GL_EXT_shader_image_load_formatted doesn't provide support for imageAtomic*() functions. These still require format layout qualifier + psContext->RequireExtension("GL_EXT_shader_image_load_formatted"); + bformata(glsl, "layout(binding=%d) ", bindpoint); + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + case RETURN_TYPE_UINT: + case RETURN_TYPE_SINT: + bcatcstr(glsl, "highp "); //TODO: half case? + break; + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + bcatcstr(glsl, "lowp "); + break; + default: + ASSERT(0); + } + } + else + { + if (isVulkan) + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(name); + bformata(glsl, "layout(set = %d, binding = %d, ", binding.set, binding.binding); + } + else + bformata(glsl, "layout(binding=%d, ", bindpoint); + + const ResourceBinding* psBinding = 0; + if (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, &psBinding)) + precision = psBinding->ePrecision; + + if (psDecl->sUAV.Type == RETURN_TYPE_FLOAT && numComponents == 3 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "r11f_g11f_b10f) mediump "); + } + else if (psDecl->sUAV.Type == RETURN_TYPE_UNORM && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "rgb10_a2) mediump "); + } + else if (psDecl->sUAV.Type == RETURN_TYPE_UINT && numComponents == 4 && precision == REFLECT_RESOURCE_PRECISION_LOWP) + { + if (IsESLanguage(psContext->psShader->eTargetLanguage)) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "rgb10_a2ui) mediump "); + } + else + { + if (numComponents >= 1) + bcatcstr(glsl, "r"); + if (numComponents >= 2) + bcatcstr(glsl, "g"); + if (numComponents >= 3) + bcatcstr(glsl, "ba"); + + switch (psDecl->sUAV.Type) + { + case RETURN_TYPE_FLOAT: + { + switch (precision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "16f) mediump "); break; + default: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bcatcstr(glsl, "32f) highp "); break; + } + } break; + case RETURN_TYPE_UNORM: + case RETURN_TYPE_SNORM: + { + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "8%s) lowp ", psDecl->sUAV.Type == RETURN_TYPE_SNORM ? "_snorm" : ""); + } break; + case RETURN_TYPE_UINT: + case RETURN_TYPE_SINT: + { + const char* fmt = psDecl->sUAV.Type == RETURN_TYPE_UINT ? "ui" : "i"; + switch (precision) + { + case REFLECT_RESOURCE_PRECISION_LOWP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "8%s) lowp ", fmt); break; + case REFLECT_RESOURCE_PRECISION_MEDIUMP: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "16%s) mediump ", fmt); break; + default: + if (IsESLanguage(psContext->psShader->eTargetLanguage) && numComponents != 4 && numComponents != 1) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + bformata(glsl, "32%s) highp ", fmt); break; + } + } break; + default: + ASSERT(0); + } + } + } + } + + if (psDecl->sUAV.Type == RETURN_TYPE_UINT) + imageTypePrefix[0] = 'u'; + else if (psDecl->sUAV.Type == RETURN_TYPE_SINT) + imageTypePrefix[0] = 'i'; + + // GLSL requires images to be always explicitly defined as uniforms + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + if (IsESLanguage(psShader->eTargetLanguage) || psContext->IsVulkan()) + { + psContext->RequireExtension("GL_EXT_texture_buffer"); + if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) + GenerateUnsupportedFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + } + + bformata(glsl, "uniform %simageBuffer ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1D: + { + bformata(glsl, "uniform %simage1D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + bformata(glsl, "uniform %simage2D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + bformata(glsl, "uniform %simage2DMS ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + bformata(glsl, "uniform %simage3D ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + bformata(glsl, "uniform %simageCube ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bformata(glsl, "uniform %simage1DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + bformata(glsl, "uniform %simage2DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + bformata(glsl, "uniform %simage3DArray ", imageTypePrefix); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + bformata(glsl, "uniform %simageCubeArray ", imageTypePrefix); + break; + } + default: + ASSERT(0); + break; + } + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + + unsigned int accessFlags = 0; + if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) + accessFlags |= HLSLccReflection::ReadAccess; + if (psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) + accessFlags |= HLSLccReflection::WriteAccess; + + if (IsESLanguage(psContext->psShader->eTargetLanguage) && accessFlags == (HLSLccReflection::ReadAccess | HLSLccReflection::WriteAccess)) + { + if (numComponents != 1 || precision == REFLECT_RESOURCE_PRECISION_LOWP || precision == REFLECT_RESOURCE_PRECISION_MEDIUMP) + GenerateUnsupportedReadWriteFormatWarning(psContext->m_Reflection, ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0).c_str()); + } + + psContext->m_Reflection.OnStorageImage(bindpoint, accessFlags); + + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + const bool avoidAtomicCounter = (psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0; + if (psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + else if (avoidAtomicCounter) // no support for atomic counter. We must use atomic functions in SSBO instead. + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 1, psDecl->ui32BufferStride, glsl); + } + else + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + name += "_counter"; + bcatcstr(glsl, "layout (binding = 0) uniform "); + + if (HavePrecisionQualifiers(psContext)) + bcatcstr(glsl, "highp "); + bformata(glsl, "atomic_uint %s;\n", name.c_str()); + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + } + else + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 1, 0, psDecl->ui32BufferStride, glsl); + } + + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + const bool isVulkan = (psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0; + if (psDecl->sUAV.bCounter) + { + if (isVulkan) + { + std::string uavname = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + GLSLCrossDependencyData::VulkanResourceBinding uavBinding = psContext->psDependencies->GetVulkanResourceBinding(uavname, true); + GLSLCrossDependencyData::VulkanResourceBinding counterBinding = { uavBinding.set, uavBinding.binding + 1 }; + bformata(glsl, "layout(set = %d, binding = %d) buffer %s_counterBuf { highp uint %s_counter; };\n", counterBinding.set, counterBinding.binding, uavname.c_str(), uavname.c_str()); + } + else + { + std::string name = ResourceName(psContext, RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber, 0); + name += "_counter"; + bcatcstr(glsl, "layout (binding = 0) uniform "); + + if (HavePrecisionQualifiers(psContext)) + bcatcstr(glsl, "highp "); + bformata(glsl, "atomic_uint %s;\n", name.c_str()); + } + } + + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 1, 0, psDecl->ui32BufferStride, glsl); + + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 0, 0, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psContext, psDecl->asOperands[0].ui32RegisterNumber, &psDecl->asOperands[0], + psDecl->sUAV.ui32GloballyCoherentAccess, 1, 0, 0, psDecl->ui32BufferStride, glsl); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared struct {\n"); + bformata(glsl, "\tuint value[%d];\n", psDecl->sTGSM.ui32Stride / 4); + bcatcstr(glsl, "} "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", + psDecl->sTGSM.ui32Count); + psVarType->name = "value"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + bcatcstr(glsl, "shared uint "); + TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NONE); + bformata(glsl, "[%d];\n", psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride); + + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + case OPCODE_DCL_STREAM: + { + ASSERT(psDecl->asOperands[0].eType == OPERAND_TYPE_STREAM); + + + if (psShader->eTargetLanguage >= LANG_400 && (psShader->ui32CurrentVertexOutputStream != psDecl->asOperands[0].ui32RegisterNumber)) + { + // Only emit stream declaration for desktop GL >= 4.0, and only if we're declaring something else than the default 0 + bformata(glsl, "layout(stream = %d) out;\n", psShader->ui32CurrentVertexOutputStream); + } + psShader->ui32CurrentVertexOutputStream = psDecl->asOperands[0].ui32RegisterNumber; + + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + bformata(glsl, "layout(invocations = %d) in;\n", psDecl->value.ui32GSInstanceCount); + break; + } + default: + { + ASSERT(0); + break; + } + } +} + +bool ToGLSL::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) +{ + ASSERT(sig != NULL); + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "gl_TessLevelOuter[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "gl_TessLevelInner[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + return true; + } + + switch (sig->eSystemValueType) + { + case NAME_POSITION: + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "hlslcc_FragCoord"; + else + result = "gl_Position"; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "gl_Layer"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_CLIP_DISTANCE: + case NAME_CULL_DISTANCE: + { + const char* glName = sig->eSystemValueType == NAME_CLIP_DISTANCE ? "Clip" : "Cull"; + // This is always routed through temp + std::ostringstream oss; + oss << "phase" << psContext->currentPhase << "_gl" << glName << "Distance" << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } + case NAME_VIEWPORT_ARRAY_INDEX: + result = "gl_ViewportIndex"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_VertexIndex"; + else + result = "gl_VertexID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + result = "gl_InstanceIndex"; + else + result = "gl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_IS_FRONT_FACE: + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + result = "(gl_FrontFacing ? 0xffffffffu : uint(0))"; // Old ES3.0 Adrenos treat 0u as const int + else + result = "(gl_FrontFacing ? 1 : 0)"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_PRIMITIVE_ID: + if (isInput && psContext->psShader->eShaderType == GEOMETRY_SHADER) + result = "gl_PrimitiveIDIn"; // LOL opengl + else + result = "gl_PrimitiveID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "gl_SampleID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelOuter"; + return true; + } + else + { + result = "gl_TessLevelOuter[0]"; + return true; + } + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + result = "gl_TessLevelOuter[1]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[2]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + result = "gl_TessLevelOuter[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (isIndexed) + { + result = "gl_TessLevelInner"; + return true; + } + else + { + result = "gl_TessLevelInner[0]"; + return true; + } + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + result = "gl_TessLevelInner[3]"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + default: + break; + } + + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + if ((sig->semanticName == "POS" || sig->semanticName == "POSITION" || sig->semanticName == "SV_POSITION" || sig->semanticName == "SV_Position") + && sig->ui32SemanticIndex == 0) + { + result = "gl_out[gl_InvocationID].gl_Position"; + return true; + } + std::ostringstream oss; + if (isInput) + oss << psContext->inputPrefix << sig->semanticName << sig->ui32SemanticIndex; + else + oss << psContext->outputPrefix << sig->semanticName << sig->ui32SemanticIndex << "[gl_InvocationID]"; + result = oss.str(); + return true; + } + + if ((psOperand->eType == OPERAND_TYPE_OUTPUT || psOperand->eType == OPERAND_TYPE_INPUT) + && HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + + // TODO: Add other builtins here. + if (sig->eSystemValueType == NAME_POSITION || (sig->semanticName == "POS" && sig->ui32SemanticIndex == 0 && psContext->psShader->eShaderType == VERTEX_SHADER)) + { + result = "gl_Position"; + return true; + } + + if (sig->semanticName == "PSIZE") + { + result = "gl_PointSize"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + } + + return false; +} diff --git a/third_party/HLSLcc/src/toGLSLInstruction.cpp b/third_party/HLSLcc/src/toGLSLInstruction.cpp new file mode 100644 index 0000000..c0732ff --- /dev/null +++ b/third_party/HLSLcc/src/toGLSLInstruction.cpp @@ -0,0 +1,4801 @@ +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "stdio.h" +#include +#include +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "internal_includes/toGLSL.h" +#include + +using namespace HLSLcc; + +// In toGLSLDeclaration.cpp +const char* GetSamplerType(HLSLCrossCompilerContext* psContext, + const RESOURCE_DIMENSION eDimension, + const uint32_t ui32RegisterNumber); +bool DeclareRWStructuredBufferTemplateTypeAsInteger(HLSLCrossCompilerContext* psContext, const Operand* psOperand); + +// This function prints out the destination name, possible destination writemask, assignment operator +// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) +// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed +// and pSrcCount will be filled with the number of components expected +// ui32CompMask can be used to only write to 1 or more components (used by MOVC) +void ToGLSL::AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int *pNeedsParenthesis, uint32_t ui32CompMask) +{ + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestDataType = psDest->GetDataType(psContext); + ASSERT(pNeedsParenthesis != NULL); + + *pNeedsParenthesis = 0; + + TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + + bcatcstr(glsl, " = "); + + if (precise && HavePreciseQualifier(psContext->psShader->eTargetLanguage)) + { + char const *t, *s; + switch (eDestDataType) + { + case SVT_BOOL: t = "bvec4"; break; + case SVT_INT: t = "ivec4"; break; + case SVT_FLOAT: t = "vec4"; break; + case SVT_UINT: t = "uvec4"; break; + default: ASSERT(0); t = NULL; break; + } + switch (ui32DestElementCount) + { + case 1: s = ".x"; break; + case 2: s = ".xy"; break; + case 3: s = ".xyz"; break; + case 4: s = ".xyzw"; break; + default: ASSERT(0); s = NULL; break; + } + if (t && s) + { + bformata(glsl, "(u_xlat_precise_%s%s = (", t, s); + (*pNeedsParenthesis) += 2; + } + } + + // Simple path: types match. + if (DoAssignmentDataTypesMatch(eDestDataType, eSrcType)) + { + // Cover cases where the HLSL language expects the rest of the components to be default-filled + // eg. MOV r0, c0.x => Temp[0] = vec4(c0.x); + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + + return; + } + + switch (eDestDataType) + { + case SVT_INT: + case SVT_INT12: + case SVT_INT16: + // Bitcasts from lower precisions are ambiguous + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "floatBitsToInt("); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + case SVT_UINT: + case SVT_UINT16: + ASSERT(eSrcType != SVT_FLOAT10 && eSrcType != SVT_FLOAT16); + if (eSrcType == SVT_FLOAT && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "floatBitsToUint("); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + ASSERT(eSrcType != SVT_INT12 || (eSrcType != SVT_INT16 && eSrcType != SVT_UINT16)); + if (psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + { + if (eSrcType == SVT_INT) + bcatcstr(glsl, "intBitsToFloat("); + else + bcatcstr(glsl, "uintBitsToFloat("); + // Cover cases where the HLSL language expects the rest of the components to be default-filled + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eSrcType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + } + } + else + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + + (*pNeedsParenthesis)++; + break; + case SVT_BOOL: + bformata(glsl, " %s(", GetConstructorForTypeGLSL(psContext, eDestDataType, ui32DestElementCount, false)); + (*pNeedsParenthesis)++; + break; + default: + ASSERT(0); + break; + } +} + +void ToGLSL::AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int* pNeedsParenthesis) +{ + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, pNeedsParenthesis, OPERAND_4_COMPONENT_MASK_ALL); +} + +void ToGLSL::AddAssignPrologue(int numParenthesis, bool isEmbedded /* = false*/) +{ + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + if (!isEmbedded) + bcatcstr(glsl, ";\n"); +} + +void ToGLSL::AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag) +{ + // Multiple cases to consider here: + // For shader model <=3: all comparisons are floats + // otherwise: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + // Additional complexity: if dest swizzle element count is 1, we can use normal comparison operators, otherwise glsl intrinsics. + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + int floatResult = 0; + + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if (s0ElemCount != s1ElemCount) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::max(s0ElemCount, s1ElemCount) - 2)); + } + + if (psContext->psShader->ui32MajorVersion < 4) + { + floatResult = 1; + } + + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "equal", + "lessThan", + "greaterThanEqual", + "notEqual", + }; + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, false)); + bcatcstr(glsl, "("); + } + bformata(glsl, "%s(", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, ")"); + TranslateOperandSwizzle(psContext, &psInst->asOperands[0], 0); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + if (!floatResult) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, " * 0xFFFFFFFFu"); + else + bcatcstr(glsl, " * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + } + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + { + // Workarounds for bug cases 777617, 735299, 776827 + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = !!("); + needsParenthesis += 1; + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + AddAssignPrologue(needsParenthesis); + } + else + { + bcatcstr(glsl, "{ bool cond = "); + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + bcatcstr(glsl, "; "); + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); + bcatcstr(glsl, "!!cond ? "); + if (floatResult) + bcatcstr(glsl, "1.0 : 0.0"); + else + { + // Old ES3.0 Adrenos treat 0u as const int. + // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "0xFFFFFFFFu : uint(0)" : "-1 : 0"); + } + AddAssignPrologue(needsParenthesis, true); + bcatcstr(glsl, "; }\n"); + } + + bcatcstr(glsl, "#else\n"); + } + + int needsParenthesis = 0; + psContext->AddIndentation(); + if (isBoolDest) + { + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], floatResult ? SVT_FLOAT : SVT_UINT, destElemCount, psInst->ui32PreciseMask, &needsParenthesis); + bcatcstr(glsl, "("); + } + TranslateOperand(&psInst->asOperands[1], typeFlag); + bformata(glsl, "%s", glslOpcode[eType]); + TranslateOperand(&psInst->asOperands[2], typeFlag); + if (!isBoolDest) + { + if (floatResult) + bcatcstr(glsl, ") ? 1.0 : 0.0"); + else + { + // Old ES3.0 Adrenos treat 0u as const int. + // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? ") ? 0xFFFFFFFFu : uint(0)" : ") ? -1 : 0"); + } + } + AddAssignPrologue(needsParenthesis); + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } +} + +void ToGLSL::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise, bool isEmbedded /* = false*/) +{ + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); + + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); + + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, &numParenthesis); + TranslateOperand(pSrc, flags, writeMask); + + AddAssignPrologue(numParenthesis, isEmbedded); +} + +void ToGLSL::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, precise, &numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? " != uint(0)) ? " : " != 0) ? "); // Old ES3.0 Adrenos treat 0u as const int. + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + // NOTE: mix() cannot be used to implement MOVC, because it propagates + // NaN from both endpoints. + int srcElem = -1; + SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const std::string tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) + && (dstType == src1->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) + && (dstType == src2->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + + const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); + bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); + TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, ";\n"); + + // Override OPERAND_TYPE_TEMP name temporarily + const_cast(pDest)->specialName.assign(tempName); + } + + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, &numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + + TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); + } + + if (dstIsSrc1 || dstIsSrc2) + { + const_cast(pDest)->specialName.clear(); + + psContext->AddIndentation(); + TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); + + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + } +} + +void ToGLSL::CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType, bool isEmbedded /* = false*/) +{ + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int needsParenthesis = 0; + + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage)) + { + const char *binaryOpWrap = NULL; + + if (!strcmp("%", name)) + binaryOpWrap = "op_modi"; + else if (!strcmp("&", name)) + binaryOpWrap = "op_and"; + else if (!strcmp("|", name)) + binaryOpWrap = "op_or"; + else if (!strcmp("^", name)) + binaryOpWrap = "op_xor"; + else if (!strcmp(">>", name)) + binaryOpWrap = "op_shr"; + else if (!strcmp("<<", name)) + binaryOpWrap = "op_shl"; + // op_not handled separately at OPCODE_NOT + + if (binaryOpWrap) + { + UseExtraFunctionDependency(binaryOpWrap); + CallHelper2Int(binaryOpWrap, psInst, 0, 1, 2, 1); + return; + } + } + + if (src1SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + if (!isEmbedded) + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + + // Adreno 3xx fails on binary ops that operate on vectors + bool opComponentWiseOnAdreno = (!strcmp("&", name) || !strcmp("|", name) || !strcmp("^", name) || !strcmp(">>", name) || !strcmp("<<", name)); + if (psContext->psShader->eTargetLanguage == LANG_ES_300 && opComponentWiseOnAdreno) + { + uint32_t i; + int firstPrinted = 0; + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < 4; i++) + { + if (!(destMask & (1 << i))) + continue; + + if (firstPrinted != 0) + bcatcstr(glsl, ", "); + else + firstPrinted = 1; + + // Remove the auto expand flags + ui32Flags &= ~(TO_AUTO_EXPAND_TO_VEC2 | TO_AUTO_EXPAND_TO_VEC3 | TO_AUTO_EXPAND_TO_VEC4); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, 1 << i); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, 1 << i); + } + bcatcstr(glsl, ")"); + } + else + { + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + } + + AddAssignPrologue(needsParenthesis, isEmbedded); +} + +void ToGLSL::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +//Result is an int. +void ToGLSL::CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, &numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +// Texel fetches etc need a dummy sampler (because glslang wants one, for Reasons(tm)). +// Any non-shadow sampler will do, so try to get one from sampler registers. If the current shader doesn't have any, declare a dummy one. +std::string ToGLSL::GetVulkanDummySamplerName() +{ + std::string dummySmpName = "hlslcc_dummyPointClamp"; + if (!psContext->IsVulkan()) + return ""; + + const ResourceBinding *pSmpInfo = NULL; + int smpIdx = 0; + + while (psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, smpIdx, &pSmpInfo) != 0) + { + if (pSmpInfo->m_SamplerMode != D3D10_SB_SAMPLER_MODE_COMPARISON) + return ResourceName(psContext, RGROUP_SAMPLER, smpIdx, 0); + + smpIdx++; + } + + if (!psContext->psShader->m_DummySamplerDeclared) + { + GLSLCrossDependencyData::VulkanResourceBinding binding = psContext->psDependencies->GetVulkanResourceBinding(dummySmpName); + bstring code = bfromcstr(""); + bformata(code, "layout(set = %d, binding = %d) uniform mediump sampler %s;", binding.set, binding.binding, dummySmpName.c_str()); + DeclareExtraFunction(dummySmpName, code); + bdestroy(code); + psContext->psShader->m_DummySamplerDeclared = true; + } + return dummySmpName; +} + +void ToGLSL::TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + const bool hasOffset = (psInst->bAddressOffset != 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan()) + { + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], TypeFlagsToSVTType(ResourceReturnTypeToFlag(psBinding->ui32ReturnType)), 4, psInst->ui32PreciseMask, &numParenthesis); + + if (hasOffset) + bcatcstr(glsl, "texelFetchOffset("); + else + bcatcstr(glsl, "texelFetch("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + // Buffers don't have LOD or offset + if (psBinding->eDimension != REFLECT_RESOURCE_DIMENSION_BUFFER) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset) + bformata(glsl, ", %d", psInst->iUAddrOffset); + } + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE3D) + bformata(glsl, ", ivec3(%d, %d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_A); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY) + bformata(glsl, ", %d", psInst->iUAddrOffset); + if (hasOffset && psBinding->eDimension == REFLECT_RESOURCE_DIMENSION_TEXTURE2D) + bformata(glsl, ", ivec3(%d, %d)", psInst->iUAddrOffset, psInst->iVAddrOffset); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + ASSERT(psInst->eOpcode == OPCODE_LD_MS); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ")"); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Not possible in either HLSL or GLSL + ASSERT(0); + break; + } + } + + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +//Makes sure the texture coordinate swizzle is appropriate for the texture type. +//i.e. vecX for X-dimension texture. +//Currently supports floating point coord only, so not used for texelFetch. +void ToGLSL::TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) +{ + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + TranslateOperand(psTexCoordOperand, flags, opMask); +} + +void ToGLSL::GetResInfoData(Instruction* psInst, int index, int destElem) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + bool isUAV = (psInst->asOperands[2].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW); + bool isMS = psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMS || psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY; + + std::string texName = ResourceName(psContext, isUAV ? RGROUP_UAV : RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, 0); + + // On Vulkan wrap the tex name with the sampler constructor + if (psContext->IsVulkan() && !isUAV) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); + + //[width, height, depth or array size, total-mip-count] + if (index < 3) + { + int dim = GetNumTextureDimensions(psInst->eResDim); + bcatcstr(glsl, "("); + if (dim < (index + 1)) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? (HaveUnsignedTypes(psContext->psShader->eTargetLanguage) ? "uint(0)" : "0") : "0.0"); // Old ES3.0 Adrenos treat 0u as const int. + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "uvec%d(", dim); + else + bformata(glsl, "ivec%d(", dim); + } + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + bformata(glsl, "vec%d(1.0) / vec%d(", dim, dim); + else + bformata(glsl, "vec%d(", dim); + + if (isUAV) + bcatcstr(glsl, "imageSize("); + else + bcatcstr(glsl, "textureSize("); + + bcatcstr(glsl, texName.c_str()); + + if (!isUAV && !isMS) + { + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + } + bcatcstr(glsl, "))"); + + switch (index) + { + case 0: + bcatcstr(glsl, ".x"); + break; + case 1: + bcatcstr(glsl, ".y"); + break; + case 2: + bcatcstr(glsl, ".z"); + break; + } + } + + bcatcstr(glsl, ")"); + } + else + { + ASSERT(!isUAV); + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uint("); + else + bcatcstr(glsl, "int("); + } + else + bcatcstr(glsl, "float("); + bcatcstr(glsl, "textureQueryLevels("); + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, "))"); + } + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char* funcName = "texture"; + const char* offset = ""; + const char* depthCmpCoordType = ""; + const char* gradSwizzle = ""; + const char* ext = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + const int iHaveOverloadedTexFuncs = HaveOverloadedTextureFuncs(psContext->psShader->eTargetLanguage); + const int useCombinedTextureSamplers = (psContext->flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) ? 1 : 0; + + if (psInst->bAddressOffset) + { + offset = "Offset"; + } + if (psContext->IsSwitch() && psInst->eOpcode == OPCODE_GATHER4_PO) + { + // it seems that other GLSLCore compilers accept textureGather(sampler2D sampler, vec2 texCoord, ivec2 texelOffset, int component) with the "texelOffset" parameter, + // however this is not in the GLSL spec, and Switch's GLSLc compiler requires to use the textureGatherOffset version of the function + offset = "Offset"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + depthCmpCoordType = "vec2"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture1D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow1D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture2D"; + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + funcName = "shadow2D"; + } + } + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "textureCube"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + if (!iHaveOverloadedTexFuncs) + { + funcName = "texture3D"; + } + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + depthCmpCoordType = "vec3"; + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + depthCmpCoordType = "vec4"; + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (ui32Flags & TEXSMP_FLAG_GATHER) + funcName = "textureGather"; + + uint32_t uniqueNameCounter = 0; + + // In GLSL, for every texture sampling func overload, except for cubemap arrays, the + // depth compare reference value is given as the last component of the texture coord vector. + // Cubemap array sampling as well as all the gather funcs have a separate parameter for it. + // HLSL always provides the reference as a separate param. + // + // Here we create a temp texcoord var with the reference value embedded + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) + { + uniqueNameCounter = psContext->psShader->asPhases[psContext->currentPhase].m_NextTexCoordTemp++; + psContext->AddIndentation(); + // Create a temp variable for the coordinate as Adrenos hate nonstandard swizzles in the texcoords + bformata(glsl, "%s txVec%d = ", depthCmpCoordType, uniqueNameCounter); + bformata(glsl, "%s(", depthCmpCoordType); + TranslateTexCoord(eResDim, psDestAddr); + bcatcstr(glsl, ","); + // Last component is the reference + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ");\n"); + } + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); + + // GLSL doesn't have textureLod() for 2d shadow samplers, we'll have to use grad instead. In that case assume LOD 0. + bool needsLodWorkaround = (eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY) && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + const bool needsLodWorkaroundES2 = (psContext->psShader->eTargetLanguage == LANG_ES_100 && psContext->psShader->eShaderType == PIXEL_SHADER && (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)); + + // Workaround for switch for OPCODE_SAMPLE_C_LZ, in particular sampler2dArrayShadow.SampleCmpLevelZero(). + // textureGrad() with shadow samplers is not implemented in HW on switch so the behavior is emulated using shuffles and 4 texture fetches. + // The code generated is very heavy. + // Workaround: use standard texture fetch, shadows are currently not mipmapped, so that should work for now. + if (needsLodWorkaround && psContext->IsSwitch() && ui32Flags == (TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD)) + { + needsLodWorkaround = false; + ui32Flags &= ~TEXSMP_FLAG_FIRSTLOD; + } + + if (needsLodWorkaround) + { + bformata(glsl, "%sGrad%s(", funcName, offset); + } + else + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && + psContext->psShader->eShaderType == PIXEL_SHADER && + ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD | TEXSMP_FLAG_GRAD)) + ext = "EXT"; + + if (ui32Flags & (TEXSMP_FLAG_LOD | TEXSMP_FLAG_FIRSTLOD) && !needsLodWorkaroundES2) + bformata(glsl, "%sLod%s%s(", funcName, ext, offset); + else if (ui32Flags & TEXSMP_FLAG_GRAD) + bformata(glsl, "%sGrad%s%s(", funcName, ext, offset); + else + bformata(glsl, "%s%s%s(", funcName, ext, offset); + } + + if (psContext->IsVulkan()) + { + // Build the sampler name here + std::string samplerType = GetSamplerType(psContext, eResDim, psSrcTex->ui32RegisterNumber); + const ResourceBinding *pSmpRes = NULL; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, &pSmpRes); + + if (pSmpRes->m_SamplerMode == D3D10_SB_SAMPLER_MODE_COMPARISON) + samplerType.append("Shadow"); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, 0); + std::string smpName = ResourceName(psContext, RGROUP_SAMPLER, psSrcSamp->ui32RegisterNumber, 0); + bformata(glsl, "%s(%s, %s)", samplerType.c_str(), texName.c_str(), smpName.c_str()); + } + else + { + // Sampler name + if (!useCombinedTextureSamplers) + ResourceName(glsl, psContext, RGROUP_TEXTURE, psSrcTex->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE); + else + bcatcstr(glsl, TextureSamplerName(&psContext->psShader->sInfo, psSrcTex->ui32RegisterNumber, psSrcSamp->ui32RegisterNumber, ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE).c_str()); + } + bcatcstr(glsl, ", "); + + // Texture coordinates, either from previously constructed temp + // or straight from the psDestAddr operand + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim != RESOURCE_DIMENSION_TEXTURECUBEARRAY && !(ui32Flags & TEXSMP_FLAG_GATHER))) + bformata(glsl, "txVec%d", uniqueNameCounter); + else + TranslateTexCoord(eResDim, psDestAddr); + + // If depth compare reference was not embedded to texcoord + // then insert it here as a separate param + if ((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) && + (eResDim == RESOURCE_DIMENSION_TEXTURECUBEARRAY || (ui32Flags & TEXSMP_FLAG_GATHER))) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add LOD/grad parameters based on the flags + if (needsLodWorkaround) + { + bcatcstr(glsl, ", vec2(0.0, 0.0), vec2(0.0, 0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_LOD) + { + if (!needsLodWorkaroundES2) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + } + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + if (!needsLodWorkaroundES2) + bcatcstr(glsl, ", 0.0"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", vec4("); + TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + } + + // Add offset param + if (psInst->bAddressOffset) + { + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else if (ui32NumOffsets == 2) + { + bformata(glsl, ", ivec2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else if (ui32NumOffsets == 3) + { + bformata(glsl, ", ivec3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add bias if present + if (ui32Flags & TEXSMP_FLAG_BIAS) + { + bcatcstr(glsl, ", "); + TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + bformata(glsl, ", %d", psSrcSamp->aui32Swizzle[0]); + } + else + { + // Component selection not supported with depth compare gather + } + } + } + + bcatcstr(glsl, ")"); + + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); +} + +const char* swizzleString[] = { ".x", ".y", ".z", ".w" }; + +// Handle cases where vector components are accessed with dynamic index ([] notation). +// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting +// the offset back to vector component index in runtime => calculating stuff back and forth. +// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... +void ToGLSL::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +{ + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } + else + { + // The var containing byte address to the requested element + TranslateOperand(psByteAddr, TO_FLAG_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %d)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); + } +} + +void ToGLSL::TranslateShaderStorageStore(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_STORE_RAW: + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + + TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + TranslateOperand(psDestByteOff, dstOffFlag); + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + + bcatcstr(glsl, "]"); + + uint32_t srcFlag = TO_FLAG_UNSIGNED_INTEGER; + if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, psDest) && + psDest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) // group shared is uint + srcFlag = TO_FLAG_INTEGER; + + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + TranslateOperand(psSrc, srcFlag, 1 << (srcComponent++)); + else + TranslateOperand(psSrc, srcFlag, OPERAND_4_COMPONENT_MASK_X); + + bcatcstr(glsl, ";\n"); + } + } +} + +void ToGLSL::TranslateShaderStorageLoad(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, &numParenthesis); //TODO check this out? + if (destCount > 1 || destDataType == SVT_FLOAT16) + { + bformata(glsl, "%s(", GetConstructorForTypeGLSL(psContext, destDataType, destCount, false)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + int addedBitcast = 0; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + if (HaveBitEncodingOps(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "uintBitsToFloat("); + else + bcatcstr(glsl, "float("); + addedBitcast = 1; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = 1; + } + + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrc->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + TranslateOperand(psSrcByteOff, srcOffFlag); + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); +} + +void ToGLSL::TranslateAtomicMemOp(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DstDataTypeFlag = TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); + } + func = "Add"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); + } + func = "Add"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); + } + func = "And"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); + } + func = "And"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); + } + func = "Or"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); + } + func = "Or"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); + } + func = "Xor"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); + } + func = "Xor"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); + } + func = "Exchange"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); + } + func = "CompSwap"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); + } + func = "CompSwap"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); + } + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); + } + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); + } + func = "Min"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); + } + func = "Min"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); + } + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); + } + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); + } + func = "Max"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); + } + func = "Max"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + default: + ASSERT(0); + break; + } + } + else if (psBinding->eType == RTYPE_UAV_RWSTRUCTURED) + { + if (DeclareRWStructuredBufferTemplateTypeAsInteger(psContext, dest)) + { + isUint = false; + ui32DstDataTypeFlag |= TO_FLAG_INTEGER; + } + } + } + + if (isUint && HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); + + if (texDim > 0) + bcatcstr(glsl, "imageAtomic"); + else + bcatcstr(glsl, "atomic"); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + TranslateOperand(dest, ui32DstDataTypeFlag); + + if (texDim > 0) + { + bcatcstr(glsl, ", "); + unsigned int compMask = OPERAND_4_COMPONENT_MASK_X; + if (texDim >= 2) + compMask |= OPERAND_4_COMPONENT_MASK_Y; + if (texDim == 3) + compMask |= OPERAND_4_COMPONENT_MASK_Z; + + TranslateOperand(destAddr, TO_FLAG_INTEGER, compMask); + } + else + { + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + bcatcstr(glsl, "_buf"); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + bcatcstr(glsl, "["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + + bcatcstr(glsl, ".value["); + TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bcatcstr(glsl, "]"); + } + + bcatcstr(glsl, ", "); + + if (compare) + { + TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ", "); + } + + TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); +} + +void ToGLSL::TranslateConditional( + Instruction* psInst, + bstring glsl) +{ + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + statement = "return"; + } + + SHADER_VARIABLE_TYPE argType = psInst->asOperands[0].GetDataType(psContext); + if (argType == SVT_BOOL) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + uint32_t oFlag = TO_FLAG_UNSIGNED_INTEGER; + bool isInt = false; + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) || argType == SVT_INT || argType == SVT_INT16 || argType == SVT_INT12) + { + isInt = true; + oFlag = TO_FLAG_INTEGER; + } + + bcatcstr(glsl, "if("); + TranslateOperand(&psInst->asOperands[0], oFlag); + + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + bcatcstr(glsl, " == "); + else + bcatcstr(glsl, " != "); + + bcatcstr(glsl, isInt ? "0)" : "uint(0))"); // Old ES3.0 Adrenos treat 0u as const int. + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, " {%s;}\n", statement); + } + else + { + bcatcstr(glsl, " {\n"); + } + } +} + +void ToGLSL::HandleSwitchTransformation(Instruction* psInst, bstring glsl) +{ + SwitchConversion& current = m_SwitchStack.back(); + if (psInst->eOpcode != OPCODE_CASE && current.currentCaseOperands.size() > 0) + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, current.isFirstCase ? "if(" : "} else if("); + current.isFirstCase = false; + for (size_t i = 0; i < current.currentCaseOperands.size(); ++i) + { + if (i > 0) + bcatcstr(glsl, " || "); + + bformata(glsl, "%s == %s", current.switchOperand->data, current.currentCaseOperands[i]->data); + bdestroy(current.currentCaseOperands[i]); + } + bcatcstr(glsl, ") {\n"); + ++psContext->indent; + current.currentCaseOperands.clear(); + } + + if (current.conditionalsInfo.size() > 0) + { + SwitchConversion::ConditionalInfo& conditional = current.conditionalsInfo.back(); + + if (conditional.breakEncountered) + { + // We first check for BREAK ENDIF sequence. + // If we see ELSE or CASE afterwards, we don't emit our own ELSE. + if (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered) + conditional.endifEncountered = true; + else + { + conditional.endifEncountered = false; + conditional.breakEncountered = false; + if (psInst->eOpcode == OPCODE_ELSE) + { + if (conditional.breakCount > 0) + --conditional.breakCount; + } + else if (psInst->eOpcode != OPCODE_CASE) + { + psContext->AddIndentation(); + bcatcstr(glsl, "else {\n"); + ++psContext->indent; + } + } + } + + if (psInst->eOpcode == OPCODE_CASE || psInst->eOpcode == OPCODE_ENDSWITCH || (psInst->eOpcode == OPCODE_ENDIF && !conditional.endifEncountered)) + { + for (int i = 0; i < conditional.breakCount; ++i) + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + current.conditionalsInfo.pop_back(); + } + } +} + +void ToGLSL::TranslateInstruction(Instruction* psInst, bool isEmbedded /* = false */) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const bool isVulkan = ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0); + const bool avoidAtomicCounter = ((psContext->flags & HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS) != 0); + + if (!isEmbedded) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); + #if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } + #endif + } + if (psInst->m_SkipTranslation) + return; + } + + if (!m_SwitchStack.empty()) + HandleSwitchTransformation(psInst, glsl); + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); + } + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } + } + if (!isEmbedded) + psContext->AddIndentation(); + + // UNITY SPECIFIC: you can check case 1158280 + // This looks like a hack because it is! There is a bug that is quite hard to reproduce. + // When doing data analysis we assume that immediates are ints and hope it will be promoted later + // which is kinda fine unless there is an unfortunate combination happening: + // We operate on 4-component registers - we need different components to be treated as float/int + // but we should not use float operations (as this will mark register as float) + // instead "float" components should be used for MOV and friends to other registers + // and they, in turn, should be used for float ops + // In pseudocode it can look like this: + // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly + // bool foo = var1.z | <...> // marked as int + // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints + // var1.w = 1 // var1 is marked int + // What is important is that this temporary is marked as int by us but DX compiler treats it + // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] + // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) + // + // At this point we have mov immediate to int temp (which should really be float temp) + { + Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; + if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int + && pDst->eType == OPERAND_TYPE_TEMP // dst is temp + && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate + && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated + ) + { + pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; + } + } + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask, isEmbedded); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + bcatcstr(glsl, "//ITOF\n"); + else + bcatcstr(glsl, "//UTOF\n"); + } + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, castType, dstCount, false)); + bcatcstr(glsl, "("); // 1 + TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); + } + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, TO_FLAG_NONE); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); + } + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); + } + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + if (!isEmbedded) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } + } + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType, isEmbedded); + break; + } + case OPCODE_ADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); + } + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); + } + uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + Operand* pDest = &psInst->asOperands[0]; + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + const std::string tempName = "hlslcc_orTemp"; + + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + const char* constructorStr = HLSLcc::GetConstructorForType(psContext, eDestType, numComponents, false); + bformata(glsl, "%s %s = ", constructorStr, tempName.c_str()); + TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, ";\n"); + + const_cast(pDest)->specialName.assign(tempName); + + int srcElem = -1; + for (uint32_t destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, psInst->ui32PreciseMask, &numParenthesis, 1 << destElem); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, " || "); + TranslateOperand(&psInst->asOperands[2], SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); + } + + const_cast(pDest)->specialName.clear(); + + psContext->AddIndentation(); + TranslateOperand(glsl, pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); + + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); + } + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + if (dstSwizCount == 1) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + { + // Do component-wise and, glsl doesn't support && on bvecs + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + psContext->AddIndentation(); + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, 1, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " && "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, 1 << k); + AddAssignPrologue(needsParenthesis); + } + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We cannot use mix(), because it propagates NaN from both endpoints, which + // is not correct if the AND was used to implement a branch that guards against NaN. + // Instead, do either a single ?: select if the bool is a scalar, or component-wise + // ?: selects if the bool is a vector. + if (psInst->asOperands[boolOp].IsSwizzleReplicated()) + { + // Bool is effectively a scalar, we can just do a single ?: + + // The swizzle is either xxxx, yyyy, zzzz, or wwww. In each case, + // the max component will give us the 1-based index. + int boolChannel = psInst->asOperands[boolOp].GetMaxComponent(); + + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << (boolChannel - 1)); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, eDataType, dstSwizCount, false)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + bcatcstr(glsl, "0.0"); + } + bcatcstr(glsl, ")"); + } + else + { + bool needsIndent = false; + + // Do component-wise select + for (uint32_t k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) == 0) + continue; + + int needsParenthesis = 0; + if (needsIndent) + psContext->AddIndentation(); + + // Override dest mask temporarily + psInst->asOperands[0].ui32CompMask = (1 << k); + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + AddAssignToDest(&psInst->asOperands[0], eDataType, 1, psInst->ui32PreciseMask, &needsParenthesis); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, 1 << k); + bcatcstr(glsl, " ? "); + TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, 1 << k); + bcatcstr(glsl, " : 0.0"); + AddAssignPrologue(needsParenthesis); + + needsIndent = true; + } + + // Restore old mask + psInst->asOperands[0].ui32CompMask = destMask; + } + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, &needsParenthesis); + const bool haveNativeBitwiseOps = HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage); + if (!haveNativeBitwiseOps) + { + UseExtraFunctionDependency("op_and"); + bcatcstr(glsl, "op_and"); + } + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_UINT, dstSwizCount, false)); + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * 0xFFFFFFFFu"); + else + bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of at least (-2^16, 2^16) + + if (haveNativeBitwiseOps) + bcatcstr(glsl, ") & "); + else + bcatcstr(glsl, ", "); + + TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + if (!haveNativeBitwiseOps) + bcatcstr(glsl, ")"); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); + } + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); + } + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); + } + //destQuotient, destRemainder, src0, src1 + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } + break; + } + case OPCODE_DIV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); + } + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); + } + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, "dot("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); + } + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); + } + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); + } + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); + } + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); + } + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); + break; + } + case OPCODE_SWAPC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); + } + // TODO needs temps!! + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); + break; + } + + case OPCODE_LOG: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); + } + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); + } + CallHelper1("inversesqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); + } + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); + } + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); + } + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); + } + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("trunc"); + + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + UseExtraFunctionDependency("roundEven"); + + CallHelper1("roundEven", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); + } + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("max", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); + } + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + CallHelper2("min", psInst, 0, 1, 2, 1); + else + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); + } + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); + } + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } + } + psContext->AddIndentation(); + bcatcstr(glsl, "return;\n"); + break; + } + case OPCODE_INTERFACE_CALL: + { + const char* name; + ShaderVar* psVar; + uint32_t varFound; + + uint32_t funcPointer; + uint32_t funcBodyIndex; + uint32_t ui32NumBodiesPerTable; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INTERFACE_CALL\n"); + } + + ASSERT(psInst->asOperands[0].eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32); + + funcPointer = psInst->asOperands[0].aui32ArraySizes[0]; + funcBodyIndex = psInst->ui32FuncIndexWithinInterface; + + ui32NumBodiesPerTable = psContext->psShader->funcPointer[funcPointer].ui32NumBodiesPerTable; + + varFound = psContext->psShader->sInfo.GetInterfaceVarFromOffset(funcPointer, &psVar); + + ASSERT(varFound); + + name = &psVar->name[0]; + + psContext->AddIndentation(); + bcatcstr(glsl, name); + TranslateOperandIndexMAD(&psInst->asOperands[0], 1, ui32NumBodiesPerTable, funcBodyIndex); + //bformata(glsl, "[%d]", funcBodyIndex); + bcatcstr(glsl, "();\n"); + break; + } + case OPCODE_LABEL: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LABEL\n"); + } + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); //Closing brace ends the previous function. + psContext->AddIndentation(); + + bcatcstr(glsl, "subroutine(SubroutineType)\n"); + bcatcstr(glsl, "void "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "(){\n"); + ++psContext->indent; + break; + } + case OPCODE_COUNTBITS: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); + } + psContext->AddIndentation(); + + // in glsl bitCount decl is genIType bitCount(genIType), so it is important that input/output types agree + // enter assembly: when writing swizzle encoding we use 0 to say "source from x" + // now, say, we generate code o.xy = bitcount(i.xy) + // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into + // bitCount(i.<..>) [well, tweaking swizzle, sure] + // what does that mean is that we can safely take output component count to determine "proper" type + // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic + const Operand* dst = &psInst->asOperands[0]; + const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; + + TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitCount("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findLSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = findMSB("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitfieldReverse("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t numelements_width = psInst->asOperands[1].GetNumSwizzleElements(); + uint32_t numelements_offset = psInst->asOperands[2].GetNumSwizzleElements(); + uint32_t numelements_dest = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t numoverall_elements = std::min(std::min(numelements_width, numelements_offset), numelements_dest); + uint32_t i, j, k; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); + } + if (psContext->psShader->eTargetLanguage == LANG_ES_300) + UseExtraFunctionDependency("int_bitfieldInsert"); + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, numoverall_elements, psInst->ui32PreciseMask, &numParenthesis); + + if (numoverall_elements == 1) + bformata(glsl, "int("); + else + bformata(glsl, "ivec%d(", numoverall_elements); + + k = 0; + for (i = 0; i < 4; ++i) + { + if ((destMask & (1 << i)) == 0) + continue; + + k++; + if (psContext->psShader->eTargetLanguage == LANG_ES_300) + bcatcstr(glsl, "int_bitfieldInsert("); + else + bcatcstr(glsl, "bitfieldInsert("); + + for (j = 4; j >= 1; --j) + { + TranslateOperand(&psInst->asOperands[j], TO_FLAG_INTEGER, 1 << i); + if (j != 1) + bcatcstr(glsl, ","); + } + + bcatcstr(glsl, ") "); + if (k != numoverall_elements) + bcatcstr(glsl, ", "); + } + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + case OPCODE_EMIT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT\n"); + } + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } + } + + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + break; + } + case OPCODE_EMITTHENCUT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + break; + } + + case OPCODE_CUT_STREAM: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CUT_STREAM\n"); + } + psContext->AddIndentation(); + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + + break; + } + case OPCODE_EMIT_STREAM: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMIT_STREAM\n"); + } + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } + } + + psContext->AddIndentation(); + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_EMITTHENCUT_STREAM: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EMITTHENCUT\n"); + } + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_STREAM); + if (psContext->psShader->eTargetLanguage < LANG_400 || psInst->asOperands[0].ui32RegisterNumber == 0) + { + // ES geom shaders only support one stream. + bcatcstr(glsl, "EmitVertex();\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndPrimitive();\n"); + } + else + { + bcatcstr(glsl, "EmitStreamVertex("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "EndStreamPrimitive("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, ");\n"); + } + break; + } + case OPCODE_REP: + { + if (!m_SwitchStack.empty()) + ++m_SwitchStack.back().isInLoop; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//REP\n"); + } + //Need to handle nesting. + //Max of 4 for rep - 'Flow Control Limitations' http://msdn.microsoft.com/en-us/library/windows/desktop/bb219848(v=vs.85).aspx + + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter = "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ";\n"); + + psContext->AddIndentation(); + bcatcstr(glsl, "while(RepCounter!=0){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDREP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDREP\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "RepCounter--;\n"); + + --psContext->indent; + + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + if (!m_SwitchStack.empty()) + --m_SwitchStack.back().isInLoop; + break; + } + case OPCODE_LOOP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); + } + if (!m_SwitchStack.empty()) + ++m_SwitchStack.back().isInLoop; + psContext->AddIndentation(); + + if (psInst->ui32NumOperands == 2) + { + //DX9 version + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_SPECIAL_LOOPCOUNTER); + bcatcstr(glsl, "for("); + bcatcstr(glsl, "LoopCounter = "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".y, ZeroBasedCounter = 0;"); + bcatcstr(glsl, "ZeroBasedCounter < "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".x;"); + + bcatcstr(glsl, "LoopCounter += "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, ".z, ZeroBasedCounter++){\n"); + ++psContext->indent; + } + else if (psInst->m_LoopInductors[1] != 0) + { + // Can emit as for + uint32_t typeFlags = TO_FLAG_INTEGER; + bcatcstr(glsl, "for("); + if (psInst->m_LoopInductors[0] != 0) + { + if (psInst->m_InductorRegister != 0) + { + // Do declaration here as well + switch (psInst->m_LoopInductors[0]->asOperands[0].GetDataType(psContext)) + { + case SVT_INT: + bcatcstr(glsl, "int "); + break; + case SVT_UINT: + bcatcstr(glsl, "uint "); + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + default: + ASSERT(0); + break; + } + } + TranslateInstruction(psInst->m_LoopInductors[0], true); + } + bcatcstr(glsl, " ; "); + bool negateCondition = psInst->m_LoopInductors[1]->eBooleanTestType + != psInst->m_LoopInductors[2]->eBooleanTestType; + bool negateOrder = false; + + // Yet Another NVidia OSX shader compiler bug workaround (really nvidia, get your s#!t together): + // For reasons unfathomable to us, this breaks SSAO effect on OSX (case 756028) + // Broken: for(int ti_loop_1 = int(int(0xFFFFFFFCu)) ; 4 >= ti_loop_1 ; ti_loop_1++) + // Works: for (int ti_loop_1 = int(int(0xFFFFFFFCu)); ti_loop_1 <= 4; ti_loop_1++) + // + // So, check if the first argument is an immediate value, and if so, switch the order or the operands + // (and adjust condition) + if (psInst->m_LoopInductors[1]->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + negateOrder = true; + + const char *cmpOp = ""; + switch (psInst->m_LoopInductors[1]->eOpcode) + { + case OPCODE_IGE: + if (negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + break; + case OPCODE_ILT: + if (negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + break; + case OPCODE_IEQ: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "!=" : "=="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_INE: + // No need to change the comparison if negateOrder is true + cmpOp = negateCondition ? "==" : "!="; + if (psInst->m_LoopInductors[1]->asOperands[0].GetDataType(psContext) == SVT_UINT) + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_UGE: + if (negateOrder) + cmpOp = negateCondition ? ">" : "<="; + else + cmpOp = negateCondition ? "<" : ">="; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + case OPCODE_ULT: + if (negateOrder) + cmpOp = negateCondition ? "<=" : ">"; + else + cmpOp = negateCondition ? ">=" : "<"; + typeFlags = TO_FLAG_UNSIGNED_INTEGER; + break; + + default: + ASSERT(0); + } + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 2 : 1], typeFlags); + bcatcstr(glsl, cmpOp); + TranslateOperand(&psInst->m_LoopInductors[1]->asOperands[negateOrder ? 1 : 2], typeFlags); + + bcatcstr(glsl, " ; "); + // One more shortcut: translate IADD tX, tX, 1 to tX++ + if (HLSLcc::IsAddOneInstruction(psInst->m_LoopInductors[3])) + { + TranslateOperand(&psInst->m_LoopInductors[3]->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, "++"); + } + else + TranslateInstruction(psInst->m_LoopInductors[3], true); + + bcatcstr(glsl, ")\n"); + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + } + else + { + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + bstring name; + name = bformat(HLSLCC_TEMP_PREFIX "i_while_true_%d", m_NumDeclaredWhileTrueLoops++); + + // Workaround limitation with WebGL 1.0 GLSL, as we're expecting something to break the loop in any case + // Fragment shaders on some devices don't like too large integer constants (Adreno 3xx, for example) + int hardcoded_iteration_limit = (psContext->psShader->eShaderType == PIXEL_SHADER) ? 0x7FFF : 0x7FFFFFFF; + + bformata(glsl, "for(int %s = 0 ; %s < 0x%X ; %s++){\n", name->data, name->data, hardcoded_iteration_limit, name->data); + bdestroy(name); + } + else + { + bcatcstr(glsl, "while(true){\n"); + } + ++psContext->indent; + } + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + if (!m_SwitchStack.empty()) + --m_SwitchStack.back().isInLoop; + break; + } + case OPCODE_BREAK: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); + } + if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) + { + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + } + else + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + if (conditionalsInfo.size() > 0) + { + conditionalsInfo.back().breakEncountered = true; + ++conditionalsInfo.back().breakCount; + } + } + break; + } + case OPCODE_BREAKC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); + } + psContext->AddIndentation(); + + if (m_SwitchStack.empty() || m_SwitchStack.back().isInLoop != 0) + { + TranslateConditional(psInst, glsl); + } + else + { + // This way we won't emit a "break" when we're transforming a "switch" into if/else for ES2 + OPCODE_TYPE opcode = psInst->eOpcode; + psInst->eOpcode = OPCODE_IF; + TranslateConditional(psInst, glsl); + psInst->eOpcode = opcode; + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(1, true, true)); + } + break; + } + case OPCODE_CONTINUEC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); + } + + break; + } + case OPCODE_RETC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.push_back(SwitchConversion::ConditionalInfo(0)); + } + break; + } + case OPCODE_ENDSWITCH: + { + const bool endsSwitch = m_SwitchStack.empty(); + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDSWITCH\n"); + } + if (endsSwitch) + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + if (!endsSwitch) + m_SwitchStack.pop_back(); + break; + } + case OPCODE_ENDIF: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + + if (!m_SwitchStack.empty() && m_SwitchStack.back().isInLoop == 0) + { + std::vector& conditionalsInfo = m_SwitchStack.back().conditionalsInfo; + conditionalsInfo.pop_back(); + } + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + if (m_SwitchStack.empty()) + bcatcstr(glsl, "default:\n"); + else + bcatcstr(glsl, "} else {\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); + } + + if (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrierShared();\n"); + } + if (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) + { + psContext->AddIndentation(); + bcatcstr(glsl, "memoryBarrier();\n"); + } + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + { + psContext->AddIndentation(); + bcatcstr(glsl, "barrier();\n"); + } + break; + } + case OPCODE_SWITCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); + } + if (psContext->psShader->eTargetLanguage != LANG_ES_100) + { + psContext->AddIndentation(); + bcatcstr(glsl, "switch("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, "){\n"); + + psContext->indent += 2; + } + else + { + // GLSL ES2 doesn't support switch, need to convert to if/else if/else + SwitchConversion conversion; + TranslateOperand(conversion.switchOperand, &psInst->asOperands[0], TO_FLAG_INTEGER); + m_SwitchStack.push_back(conversion); + ++psContext->indent; + } + break; + } + case OPCODE_CASE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); + } + if (m_SwitchStack.empty()) + { + --psContext->indent; + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + } + else + { + bstring operand = bfromcstr(""); + TranslateOperand(operand, &psInst->asOperands[0], TO_FLAG_INTEGER); + m_SwitchStack.back().currentCaseOperands.push_back(operand); + } + break; + } + case OPCODE_EQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); + } + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); + } + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); + } + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); + } + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); + } + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + TranslateTexelFetch(psInst, psBinding, glsl); + break; + } + case OPCODE_DISCARD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); + } + psContext->AddIndentation(); + if (psContext->psShader->ui32MajorVersion <= 3) + { + bcatcstr(glsl, "if(any(lessThan(("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NONE); + + if (psContext->psShader->ui32MajorVersion == 1) + { + /* SM1.X only kills based on the rgb channels */ + bcatcstr(glsl, ").xyz, vec3(0)))){discard;}\n"); + } + else + { + bcatcstr(glsl, "), vec4(0)))){discard;}\n"); + } + } + else if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; + const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; + const bool useDirectTest = isBool && forceNoBoolUpscale; + bcatcstr(glsl, "if("); + bcatcstr(glsl, useDirectTest ? "!" : "("); + TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); + if (!useDirectTest) + bcatcstr(glsl, ")==0"); + bcatcstr(glsl, "){discard;}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + const bool isBool = psInst->asOperands[0].GetDataType(psContext, SVT_INT) == SVT_BOOL; + const bool forceNoBoolUpscale = psContext->psShader->eTargetLanguage >= LANG_ES_FIRST && psContext->psShader->eTargetLanguage <= LANG_ES_LAST; + const bool useDirectTest = isBool && forceNoBoolUpscale; + bcatcstr(glsl, "if("); + if (!useDirectTest) + bcatcstr(glsl, "("); + TranslateOperand(&psInst->asOperands[0], useDirectTest ? TO_FLAG_BOOL : TO_FLAG_INTEGER, OPERAND_4_COMPONENT_MASK_ALL, forceNoBoolUpscale); + if (!useDirectTest) + bcatcstr(glsl, ")!=0"); + bcatcstr(glsl, "){discard;}\n"); + } + break; + } + case OPCODE_LOD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); + } + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, &numParenthesis); + + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } + + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + TranslateOperandSwizzleWithMask(psContext, &psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_EVAL_CENTROID: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); + } + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); + } + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + int srcCount = psSrc->GetNumSwizzleElements(); + int numParenthesis = 0; + uint32_t compMask = 0; + + switch (psInst->eResDim) + { + case RESOURCE_DIMENSION_TEXTURE3D: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + compMask |= (1 << 2); + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + compMask |= (1 << 1); + case RESOURCE_DIMENSION_TEXTURE1D: + case RESOURCE_DIMENSION_BUFFER: + compMask |= 1; + break; + default: + ASSERT(0); + break; + } + + SHADER_VARIABLE_TYPE srcDataType = SVT_FLOAT; + const ResourceBinding* psBinding = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psBinding); + switch (psBinding->ui32ReturnType) + { + case RETURN_TYPE_FLOAT: + srcDataType = SVT_FLOAT; + break; + case RETURN_TYPE_SINT: + srcDataType = SVT_INT; + break; + case RETURN_TYPE_UINT: + srcDataType = SVT_UINT; + break; + case RETURN_TYPE_SNORM: + case RETURN_TYPE_UNORM: + srcDataType = SVT_FLOAT; + break; + default: + ASSERT(0); + // Suppress uninitialised variable warning + srcDataType = SVT_VOID; + break; + } + + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, "imageLoad("); + TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + TranslateOperand(psSrcAddr, TO_FLAG_INTEGER, compMask); + bcatcstr(glsl, ")"); + TranslateOperandSwizzleWithMask(psContext, psSrc, psDest->ui32CompMask, 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_STORE_RAW: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); + } + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); + } + TranslateShaderStorageStore(psInst); + break; + } + + case OPCODE_STORE_UAV_TYPED: + { + const ResourceBinding* psRes; + int foundResource; + uint32_t flags = TO_FLAG_INTEGER; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); + } + psContext->AddIndentation(); + + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + + ASSERT(foundResource); + + bcatcstr(glsl, "imageStore("); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ", "); + + switch (psRes->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + case REFLECT_RESOURCE_DIMENSION_BUFFER: + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + flags |= TO_AUTO_EXPAND_TO_VEC4; + break; + default: + ASSERT(0); + break; + } + + TranslateOperand(&psInst->asOperands[1], flags, opMask); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bformata(glsl, ");\n"); + + break; + } + case OPCODE_LD_RAW: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); + } + + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_AND: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { + int numParenthesis = 0; + int i; + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + SHADER_VARIABLE_TYPE dataType = psInst->eOpcode == OPCODE_UBFE ? SVT_UINT : SVT_INT; + uint32_t flags = psInst->eOpcode == OPCODE_UBFE ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); + } + // Need to open this up, GLSL bitfieldextract uses same offset and width for all components + for (i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], dataType, 1, psInst->ui32PreciseMask, &numParenthesis); + + bcatcstr(glsl, "bitfieldExtract("); + TranslateOperand(&psInst->asOperands[3], flags, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[2], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ", "); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_INT, (1 << i)); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, srcElemCount, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeGLSL(psContext, SVT_FLOAT, destElemCount, false)); + bcatcstr(glsl, "("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); + } + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); + + bcatcstr(glsl, "packHalf2x16(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_F16TOF32: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); + } + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, &numParenthesis); + + bcatcstr(glsl, "unpackHalf2x16("); + TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); + } + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); + + bcatcstr(glsl, "0 - "); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); + } + CallHelper1("dFdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); + } + CallHelper1("dFdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); + } + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = dot(vec2("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), vec2("); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ")) + "); + TranslateOperand(&psInst->asOperands[3], TO_FLAG_NONE); + bcatcstr(glsl, ";\n"); + break; + } + case OPCODE_POW: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); + } + psContext->AddIndentation(); + TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = pow(abs("); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, "atomicAdd("); + else + bcatcstr(glsl, "atomicCounterIncrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, ", 1u)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, "(atomicAdd("); + else + bcatcstr(glsl, "atomicCounterDecrement("); + ResourceName(glsl, psContext, RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber, 0); + bformata(glsl, "_counter"); + if (isVulkan || avoidAtomicCounter) + bcatcstr(glsl, ", 0xffffffffu) + 0xffffffffu)"); + else + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); + } + // Adreno 3xx fails on ~a with "Internal compiler error: unexpected operator", use op_not instead + if (!HaveNativeBitwiseOps(psContext->psShader->eTargetLanguage) || psContext->psShader->eTargetLanguage == LANG_ES_300) + { + UseExtraFunctionDependency("op_not"); + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, "op_not("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + else + { + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, &numParenthesis); + + bcatcstr(glsl, "~("); + numParenthesis++; + TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); + } + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + uint32_t destElem; + uint32_t mask = psInst->asOperands[0].GetAccessMask(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); + } + + for (destElem = 0; destElem < 4; ++destElem) + { + if (1 << destElem & mask) + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[destElem], destElem); + } + + break; + } + case OPCODE_BUFINFO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, 1, psInst->ui32PreciseMask, &numParenthesis); + TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "_buf.length()"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_SAMPLE_INFO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); + } + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, 1, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, "textureSamples("); + std::string texName = ResourceName(psContext, RGROUP_TEXTURE, psInst->asOperands[1].ui32RegisterNumber, 0); + if (psContext->IsVulkan()) + { + std::string vulkanSamplerName = GetVulkanDummySamplerName(); + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber]; + std::string smpType = GetSamplerType(psContext, eResDim, psInst->asOperands[2].ui32RegisterNumber); + std::ostringstream oss; + oss << smpType; + oss << "(" << texName << ", " << vulkanSamplerName << ")"; + texName = oss.str(); + } + bcatcstr(glsl, texName.c_str()); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DFMA: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + + const bool workaroundAdrenoBugs = psContext->psShader->eTargetLanguage == LANG_ES_300; + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#ifdef UNITY_ADRENO_ES3\n"); + + for (int i = workaroundAdrenoBugs ? 0 : 1; i < 2; ++i) + { + const bool generateWorkaround = (i == 0); + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, dstCount, psInst->ui32PreciseMask, &numParenthesis); + bcatcstr(glsl, generateWorkaround ? "min(max(" : "clamp("); + TranslateOperand(&psInst->asOperands[0], TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, generateWorkaround ? ", 0.0), 1.0)" : ", 0.0, 1.0)"); + AddAssignPrologue(numParenthesis); + + if (generateWorkaround) + bcatcstr(glsl, "#else\n"); + } + + if (workaroundAdrenoBugs) + bcatcstr(glsl, "#endif\n"); + } +} diff --git a/third_party/HLSLcc/src/toGLSLOperand.cpp b/third_party/HLSLcc/src/toGLSLOperand.cpp new file mode 100644 index 0000000..55b8db5 --- /dev/null +++ b/third_party/HLSLcc/src/toGLSLOperand.cpp @@ -0,0 +1,1892 @@ +#include "internal_includes/toGLSLOperand.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/languages.h" +#include "bstrlib.h" +#include "hlslcc.h" +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/languages.h" +#include + +#include + +#include +#include + +using namespace HLSLcc; + +#ifndef fpcheck +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif +#endif // #ifndef fpcheck + +// In case we need to fake dynamic indexing +static const char *squareBrackets[2][2] = { { "DynamicIndex(", ")" }, { "[", "]" } }; + +// Returns nonzero if types are just different precisions of the same underlying type +static bool AreTypesCompatible(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) +{ + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + + if (a == b) + return true; + + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; + + if ((a == SVT_FLOAT || a == SVT_FLOAT16 || a == SVT_FLOAT10) && + (b == SVT_FLOAT || b == SVT_FLOAT16 || b == SVT_FLOAT10)) + return true; + + if ((a == SVT_INT || a == SVT_INT16 || a == SVT_INT12) && + (b == SVT_INT || b == SVT_INT16 || a == SVT_INT12)) + return true; + + if ((a == SVT_UINT || a == SVT_UINT16) && + (b == SVT_UINT || b == SVT_UINT16)) + return true; + + return false; +} + +void TranslateOperandSwizzle(HLSLCrossCompilerContext* psContext, const Operand* psOperand, int iRebase) +{ + TranslateOperandSwizzleWithMask(psContext, psOperand, OPERAND_4_COMPONENT_MASK_ALL, iRebase); +} + +void TranslateOperandSwizzleWithMask(HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) +{ + TranslateOperandSwizzleWithMask(*psContext->currentGLSLString, psContext, psOperand, ui32ComponentMask, iRebase); +} + +void TranslateOperandSwizzleWithMask(bstring glsl, HLSLCrossCompilerContext* psContext, const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase) +{ + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return; + } + } + } + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER) + { + /*ConstantBuffer* psCBuf = NULL; + ShaderVar* psVar = NULL; + int32_t index = -1; + GetConstantBufferFromBindingPoint(psOperand->aui32ArraySizes[0], &psContext->psShader->sInfo, &psCBuf); + + //Access the Nth vec4 (N=psOperand->aui32ArraySizes[1]) + //then apply the sizzle. + + GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVar, &index); + + bformata(glsl, ".%s", psVar->Name); + if(index != -1) + { + bformata(glsl, "[%d]", index); + }*/ + + //return; + } + + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + bcatcstr(glsl, "."); + if (mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + if (mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + if (mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + if (mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + else + //Component Swizzle + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; + + bcatcstr(glsl, "."); + + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + } + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { + bcatcstr(glsl, "."); + + if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + bcatcstr(glsl, "x"); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + bformata(glsl, "%c", "xy"[1 - iRebase]); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + bformata(glsl, "%c", "xyz"[2 - iRebase]); + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + bformata(glsl, "%c", "xyzw"[3 - iRebase]); + } + } + + //Component Select 1 + } +} + +void ToGLSL::TranslateOperandIndex(const Operand* psOperand, int index) +{ + int i = index; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + bformata(glsl, "[%d]", psOperand->aui32ArraySizes[i]); + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "["); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER); + bformata(glsl, " + %d]", psOperand->aui32ArraySizes[i]); + break; + } + default: + { + break; + } + } +} + +void ToGLSL::TranslateOperandIndexMAD(const Operand* psOperand, int index, uint32_t multiply, uint32_t add) +{ + int i = index; + int isGeoShader = psContext->psShader->eShaderType == GEOMETRY_SHADER ? 1 : 0; + + bstring glsl = *psContext->currentGLSLString; + + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + if (i > 0 || isGeoShader) + { + bformata(glsl, "[%d*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + } + else + { + bformata(glsl, "%d*%d+%d", psOperand->aui32ArraySizes[i], multiply, add); + } + break; + } + case OPERAND_INDEX_RELATIVE: + { + bcatcstr(glsl, "[int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ")*%d+%d]", multiply, add); + break; + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + bcatcstr(glsl, "[(int("); //Indexes must be integral. + TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_NONE); + bformata(glsl, ") + %d)*%d+%d]", psOperand->aui32ArraySizes[i], multiply, add); + break; + } + default: + { + break; + } + } +} + +static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents, bool &needsBitcastOp) +{ + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + needsBitcastOp = false; + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + needsBitcastOp = true; + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToInt"; + else if (to == SVT_UINT && (from == SVT_FLOAT || from == SVT_FLOAT16 || from == SVT_FLOAT10)) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; +} + +// Helper function to print out a single 32-bit immediate value in desired format +static void printImmediate32(HLSLCrossCompilerContext *psContext, bstring glsl, uint32_t value, SHADER_VARIABLE_TYPE eType) +{ + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && psContext->psShader->ui32MajorVersion > 3 && HaveBitEncodingOps(psContext->psShader->eTargetLanguage) && fpcheck(*((float *)(&value)))) + { + if (psContext->psShader->eTargetLanguage == LANG_METAL) + bcatcstr(glsl, "as_type("); + else + bcatcstr(glsl, "intBitsToFloat("); + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + if (value > 0x3ffffffe) + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bformata(glsl, "int(0x%Xu)", value); + else + bformata(glsl, "%d", value); + } + else + bformata(glsl, "%d", value); + break; + case SVT_UINT: + case SVT_UINT16: + // Adreno bug workaround (happens only on pre-lollipop Nexus 4's): '0u' is treated as int. + if (value == 0 && psContext->psShader->eTargetLanguage == LANG_ES_300) + bcatcstr(glsl, "uint(0u)"); + else + bformata(glsl, "%uu", value); + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + HLSLcc::PrintFloat(glsl, *((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + bcatcstr(glsl, "false"); + else + bcatcstr(glsl, "true"); + } + if (needsParenthesis) + bcatcstr(glsl, ")"); +} + +void ToGLSL::TranslateVariableNameWithMask(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) +{ + TranslateVariableNameWithMask(*psContext->currentGLSLString, psOperand, ui32TOFlag, pui32IgnoreSwizzle, ui32CompMask, piRebase, forceNoConversion); +} + +void ToGLSL::DeclareDynamicIndexWrapper(const struct ShaderVarType* psType) +{ + DeclareDynamicIndexWrapper(psType->name.c_str(), psType->Class, psType->Type, psType->Rows, psType->Columns, psType->Elements); +} + +void ToGLSL::DeclareDynamicIndexWrapper(const char* psName, SHADER_VARIABLE_CLASS eClass, SHADER_VARIABLE_TYPE eType, uint32_t ui32Rows, uint32_t ui32Columns, uint32_t ui32Elements) +{ + bstring glsl = psContext->beforeMain; + + const char* suffix = "DynamicIndex"; + const uint32_t maxElemCount = 256; + uint32_t elemCount = ui32Elements; + + if (m_FunctionDefinitions.find(psName) != m_FunctionDefinitions.end()) + return; + + // Add a simple define that one can search and replace on devices that support dynamic indexing the usual way + if (m_FunctionDefinitions.find(suffix) == m_FunctionDefinitions.end()) + { + m_FunctionDefinitions.insert(std::make_pair(suffix, "#define UNITY_DYNAMIC_INDEX_ES2 0\n")); + m_FunctionDefinitionsOrder.push_back(suffix); + } + + bcatcstr(glsl, "\n"); + + char name[256]; + if ((eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) && psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + sprintf(name, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s", ui32Rows, ui32Columns, psName); + else + memcpy(name, psName, strlen(psName) + 1); + + if (eClass == SVC_STRUCT) + { + bformata(glsl, "%s_Type %s%s", psName, psName, suffix); + } + else if (eClass == SVC_MATRIX_COLUMNS || eClass == SVC_MATRIX_ROWS) + { + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + bformata(glsl, "%s " HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "%s%s", HLSLcc::GetConstructorForType(psContext, eType, 4), ui32Rows, ui32Columns, psName, suffix); + elemCount = (eClass == SVC_MATRIX_COLUMNS ? ui32Columns : ui32Rows); + if (ui32Elements > 1) + { + elemCount *= ui32Elements; + } + } + else + { + bformata(glsl, "%s %s%s", HLSLcc::GetMatrixTypeName(psContext, eType, ui32Columns, ui32Rows).c_str(), psName, suffix); + } + } + else if (eClass == SVC_VECTOR && ui32Columns > 1) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, ui32Columns), psName, suffix); + } + else if ((eClass == SVC_SCALAR) || (eClass == SVC_VECTOR && ui32Columns == 1)) + { + bformata(glsl, "%s %s%s", HLSLcc::GetConstructorForType(psContext, eType, 1), psName, suffix); + } + bformata(glsl, "(int i){\n"); + bcatcstr(glsl, "#if UNITY_DYNAMIC_INDEX_ES2\n"); + bformata(glsl, " return %s[i];\n", name); + bcatcstr(glsl, "#else\n"); + bformata(glsl, "#define d_ar %s\n", name); + bformata(glsl, " if (i <= 0) return d_ar[0];"); + + // Let's draw a line somewhere with this workaround + for (int i = 1; i < std::min(elemCount, maxElemCount); i++) + { + bformata(glsl, " else if (i == %d) return d_ar[%d];", i, i); + } + bformata(glsl, "\n return d_ar[0];\n"); + bformata(glsl, "#undef d_ar\n"); + bcatcstr(glsl, "#endif\n"); + bformata(glsl, "}\n\n"); + m_FunctionDefinitions.insert(std::make_pair(psName, "")); + m_FunctionDefinitionsOrder.push_back(psName); +} + +void ToGLSL::TranslateVariableNameWithMask(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase, bool forceNoConversion) +{ + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && psOperand->IsSwizzleReplicated()) + { + // Needs scalar check as well + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t rebase = 0; + bool isArray; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags); + if (psVarType->Columns == 1) + { + scalarWithSwizzle = 1; // Needs a constructor + *pui32IgnoreSwizzle = 1; + } + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + bool needsBitcastOp = false; + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + if (AreTypesCompatible(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(psContext, eType, requestedType) || !HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + { + hasCtor = 1; + if (eType == SVT_BOOL && !forceNoConversion) + { + needsBoolUpscale = 1; + // make sure to wrap the whole thing in parens so the upscale + // multiply only applies to the bool + bcatcstr(glsl, "("); + numParenthesis++; + } + + // case 1154828: In case of OPERAND_TYPE_INPUT_PRIMITIVEID we end up here with requestedComponents == 0, GetConstructorForType below would return empty string and we miss the cast to uint + if (requestedComponents < 1) + requestedComponents = 1; + + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + } + else + { + // Direct cast not possible, need to do bitcast. + if (IsESLanguage(psContext->psShader->eTargetLanguage) && (requestedType == SVT_UINT)) + { + // without explicit cast Adreno may treat the return type of floatBitsToUint as signed int (case 1256567) + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + } + bformata(glsl, "%s(", GetBitcastOp(psContext, eType, requestedType, requestedComponents, /*out*/ needsBitcastOp).c_str()); + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + if (((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0)) + { +// ASSERT(numComponents == 1); + bformata(glsl, "%s(", GetConstructorForType(psContext, eType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + } + + + switch (psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if (psOperand->iNumComponents == 1) + { + printImmediate32(psContext, glsl, *((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + bformata(glsl, "%s(", GetConstructorForType(psContext, requestedType, requestedComponents, false)); + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); + printImmediate32(psContext, glsl, uval, requestedType); + firstItemAdded = 1; + } + bcatcstr(glsl, ")"); + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + if (psOperand->iNumComponents == 1) + { + bformata(glsl, "%.17g", + psOperand->adImmediates[0]); + } + else + { + bformata(glsl, "dvec4(%.17g, %.17g, %.17g, %.17g)", + psOperand->adImmediates[0], + psOperand->adImmediates[1], + psOperand->adImmediates[2], + psOperand->adImmediates[3]); + if (psOperand->iNumComponents != 4) + { + AddSwizzleUsingElementCount(glsl, psOperand->iNumComponents); + } + } + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch (psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + bformata(glsl, "phase%d_Input%d_%d[", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bcatcstr(glsl, "]"); + } + else + { + if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, parentIndex, + psOperand->ui32RegisterNumber - parentIndex); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + // Rewrite the variable name if we're using framebuffer fetch + if (psContext->psShader->extensions->EXT_shader_framebuffer_fetch && + psContext->psShader->eShaderType == PIXEL_SHADER && + psContext->flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) + { + // With ES2, leave separate variable names for input + if (!WriteToFragData(psContext->psShader->eTargetLanguage) && + name.size() == 13 && !strncmp(name.c_str(), "vs_SV_Target", 12)) + bcatcstr(glsl, name.substr(3).c_str()); + else + bcatcstr(glsl, name.c_str()); + } + else + { + bcatcstr(glsl, name.c_str()); + } + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + { + /*if(psContext->psShader->eShaderType == HULL_SHADER && psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + int stream = 0; + const char* name = GetDeclaredOutputName(psContext, HULL_SHADER, psOperand, &stream); + bcatcstr(glsl, name); + } + else*/ + { + int stream = 0; + std::string name = psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + + // If we are writing out to built in type then we need to redirect tot he built in arrays + // this is safe to do as HLSL enforces 1:1 mapping, so output maps to gl_InvocationID by default + if (name == "gl_Position" && psContext->psShader->eShaderType == HULL_SHADER) + { + bcatcstr(glsl, "gl_out[gl_InvocationID]."); + } + + bcatcstr(glsl, name.c_str()); + + if (psOperand->m_SubOperands[0].get()) + { + bcatcstr(glsl, "["); + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + bcatcstr(glsl, "]"); + } + } + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + if (psContext->psShader->eTargetLanguage == LANG_ES_100 && !psContext->EnableExtension("GL_EXT_frag_depth")) + { + bcatcstr(psContext->extensions, "#define gl_FragDepth gl_FragDepthEXT\n"); + } + // fall through + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + bcatcstr(glsl, "gl_FragDepth"); + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + + if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) + { + bcatcstr(glsl, psOperand->specialName.c_str()); + break; + } + + bcatcstr(glsl, HLSLCC_TEMP_PREFIX); + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + bcatcstr(glsl, "b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1 && pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + // m_ForLoopInductorName overrides the register number, if available + if (psOperand->m_ForLoopInductorName != 0) + { + bformata(glsl, "_loop_%d", psOperand->m_ForLoopInductorName); + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + else + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + { + bformata(glsl, "IntImmConst%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + { + bcatcstr(glsl, "BaseColour"); + break; + } + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + { + bcatcstr(glsl, "OffsetColour"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_FOG: + { + bcatcstr(glsl, "Fog"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_SPECIAL_ADDRESS: + { + bcatcstr(glsl, "Address"); + break; + } + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + { + bcatcstr(glsl, "LoopCounter"); + pui32IgnoreSwizzle[0] = 1; + break; + } + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + bformata(glsl, "TexCoord%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const char* StageName = "VS"; + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray = false; + bool isSubpassMS = false; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + + switch (psContext->psShader->eShaderType) + { + case PIXEL_SHADER: + { + StageName = "PS"; + break; + } + case HULL_SHADER: + { + StageName = "HS"; + break; + } + case DOMAIN_SHADER: + { + StageName = "DS"; + break; + } + case GEOMETRY_SHADER: + { + StageName = "GS"; + break; + } + case COMPUTE_SHADER: + { + StageName = "CS"; + break; + } + default: + { + break; + } + } + + if (psCBuf && psCBuf->name == "OVR_multiview") + { + pui32IgnoreSwizzle[0] = 1; + bformata(glsl, "gl_ViewID_OVR"); + break; + } + + + if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + + // FIXME: With ES 3.0 the buffer name is often not prepended to variable names + if (((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) != HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT) && + ((psContext->flags & HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT) != HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT)) + { + if (psCBuf) + { + //$Globals. + if (psCBuf->name[0] == '$') + { + bformata(glsl, "Globals%s", StageName); + } + else + { + bformata(glsl, "%s%s", psCBuf->name.c_str(), StageName); + } + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + bcatcstr(glsl, "."); + } + } + else + { + //bformata(glsl, "cb%d", psOperand->aui32ArraySizes[0]); + } + } + + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; + + ASSERT(psCBuf != NULL); + + uint32_t componentsNeeded = 1; + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + else + { + minSwiz = maxSwiz = 1; + } + + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + + // Get a possible dynamic array index + bstring dynamicIndex = bfromcstr(""); + bool needsIndexCalcRevert = false; + bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); + + Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); + + if (psDynIndexOp != NULL) + { + SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + TranslateOperand(dynamicIndex, psDynIndexOp, opFlags, 0x1); // We only care about the first component + } + + char *tmp = bstr2cstr(dynamicIndex, '\0'); + std::string dynamicIndexStr = tmp; + bcstrfree(tmp); + bdestroy(dynamicIndex); + + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || ((componentsNeeded + minSwiz) <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + bformata(glsl, "%s.", instanceName.c_str()); + } + + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + + bformata(glsl, "%s", fullName.c_str()); + } + else + bformata(glsl, "%s", fullName.c_str()); + } + else + { + // Non-simple case: build vec4 and apply mask + + std::string instanceNamePrefix; + if ((psContext->flags & HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME) && psCBuf) + { + std::string instanceName = UniformBufferInstanceName(psContext, psCBuf->name); + instanceNamePrefix = instanceName + "."; + } + + uint32_t i; + std::vector tmpArrayIndices; + bool tmpIsArray; + int32_t tmpRebase; + int firstItemAdded = 0; + + bformata(glsl, "%s(", GetConstructorForType(psContext, psVarType->Type, GetNumberBitsSet(ui32CompMask), false)); + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + memset(tmpSwizzle, 0, sizeof(uint32_t) * 4); + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + // Special hack for MSAA subpass inputs: the index is actually the sample index, so do special handling later. + if (strncmp(fullName.c_str(), "subpassLoad", 11) == 0 && fullName[fullName.length() - 1] == ',') + isSubpassMS = true; + + if (tmpVarType->Class == SVC_SCALAR) + { + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); + } + else + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + bformata(glsl, "%s%s", instanceNamePrefix.c_str(), fullName.c_str()); + bformata(glsl, ".%c", "xyzw"[swizzle]); + } + } + bcatcstr(glsl, ")"); + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } + + if (isArray) + { + index = arrayIndices.back(); + + // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. + bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); + bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); + + if (hasDynamicIndex || hasImmediateIndex) + { + std::ostringstream fullIndexOss; + if (hasDynamicIndex && hasImmediateIndex) + fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; + else if (hasDynamicIndex) + fullIndexOss << dynamicIndexStr; + else // hasImmediateStr + fullIndexOss << index; + + int squareBracketType = hasDynamicIndex ? HaveDynamicIndexing(psContext, psOperand) : 1; + + if (!squareBracketType) + DeclareDynamicIndexWrapper(psVarType); + + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for old matrix arrays + bformata(glsl, "%s%s / 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + bformata(glsl, "%s%s %% 4%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + } + else // This path is atm the default + { + if (isSubpassMS) + bformata(glsl, "%s%s%s", " ", fullIndexOss.str().c_str(), ")"); + else + bformata(glsl, "%s%s%s", squareBrackets[squareBracketType][0], fullIndexOss.str().c_str(), squareBrackets[squareBracketType][1]); + } + } + } + + if (psVarType && psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch (rebase) + { + case 4: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + bcatcstr(glsl, ".xxyx"); + } + else if (psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxyz"); + } + break; + } + case 8: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + bcatcstr(glsl, ".xxxy"); + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4 if needed + uint32_t maxComp = psOperand->GetMaxComponent(); + if (psVarType->Columns == 2 && maxComp > 2) + { + bcatcstr(glsl, ".xyxx"); + } + else if (psVarType->Columns == 3 && maxComp > 3) + { + bcatcstr(glsl, ".xyzx"); + } + break; + } + } + } + + if (psVarType && psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + ResourceName(glsl, psContext, RGROUP_TEXTURE, psOperand->ui32RegisterNumber, 0); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + bformata(glsl, "Sampler%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + const uint32_t ui32FuncBody = psOperand->ui32RegisterNumber; + const uint32_t ui32FuncTable = psContext->psShader->aui32FuncBodyToFuncTable[ui32FuncBody]; + //const uint32_t ui32FuncPointer = psContext->psShader->aui32FuncTableToFuncPointer[ui32FuncTable]; + const uint32_t ui32ClassType = psContext->psShader->sInfo.aui32TableIDToTypeID[ui32FuncTable]; + const char* ClassTypeName = &psContext->psShader->sInfo.psClassTypes[ui32ClassType].name[0]; + const uint32_t ui32UniqueClassFuncIndex = psContext->psShader->ui32NextClassFuncName[ui32ClassType]++; + + bformata(glsl, "%s_Func%d", ClassTypeName, ui32UniqueClassFuncIndex); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + bcatcstr(glsl, "phaseInstanceID"); // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + if (psContext->IsVulkan() || psContext->IsSwitch()) + { + bformata(glsl, "ImmCB_%d", psContext->currentPhase); + TranslateOperandIndex(psOperand, 0); + } + else + { + int squareBracketType = HaveDynamicIndexing(psContext, psOperand); + + bformata(glsl, "ImmCB_%d_%d_%d", psContext->currentPhase, psOperand->ui32RegisterNumber, psOperand->m_Rebase); + if (psOperand->m_SubOperands[0].get()) + { + bformata(glsl, "%s", squareBrackets[squareBracketType][0]); //Indexes must be integral. Offset is already taken care of above. + TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + bformata(glsl, "%s", squareBrackets[squareBracketType][1]); + } + if (psOperand->m_Size == 1) + *pui32IgnoreSwizzle = 1; + } + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + bcatcstr(glsl, "gl_TessCoord"); + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + + if ((psSig->eSystemValueType == NAME_POSITION && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "POS" && psSig->ui32SemanticIndex == 0) || + (psSig->semanticName == "SV_POSITION" && psSig->ui32SemanticIndex == 0)) + { + bcatcstr(glsl, "gl_in"); + TranslateOperandIndex(psOperand, 0);//Vertex index + bcatcstr(glsl, ".gl_Position"); + } + else + { + std::string name = psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + bformata(glsl, "%s", name.c_str()); + TranslateOperandIndex(psOperand, 0);//Vertex index + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + if (psContext->psShader->eTargetLanguage == LANG_ES_100) + { + // On ES2 we can pass this as an argument to a function, e.g. fake integer operations that we do. See case 1124159. + bcatcstr(glsl, "null"); + bool alreadyDeclared = false; + std::string toDeclare = "vec4 null;"; + for (size_t i = 0; i < m_AdditionalDefinitions.size(); ++i) + { + if (toDeclare == m_AdditionalDefinitions[i]) + { + alreadyDeclared = true; + break; + } + } + + if (!alreadyDeclared) + m_AdditionalDefinitions.push_back(toDeclare); + } + else + bcatcstr(glsl, "//null"); + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + bcatcstr(glsl, "gl_InvocationID"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMask[0]"); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + bcatcstr(glsl, "gl_SampleMaskIn[0]"); + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + bcatcstr(glsl, "gl_GlobalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + bcatcstr(glsl, "gl_LocalInvocationID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + bcatcstr(glsl, "gl_WorkGroupID"); + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + if (requestedComponents > 1 && !hasCtor) + { + bcatcstr(glsl, GetConstructorForType(psContext, eType, requestedComponents, false)); + bcatcstr(glsl, "("); + numParenthesis++; + hasCtor = 1; + } + + for (uint32_t i = 0; i < requestedComponents; i++) + { + bcatcstr(glsl, "gl_LocalInvocationIndex"); + if (i < requestedComponents - 1) + bcatcstr(glsl, ", "); + } + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + ResourceName(glsl, psContext, RGROUP_UAV, psOperand->ui32RegisterNumber, 0); + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + bformata(glsl, "TGSM%d", psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + if (psContext->psShader->eShaderType == GEOMETRY_SHADER) + bcatcstr(glsl, "gl_PrimitiveIDIn"); // LOL OpenGL + else + bcatcstr(glsl, "gl_PrimitiveID"); + + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + bformata(glsl, "TempArray%d", psOperand->aui32ArraySizes[0]); + bcatcstr(glsl, "["); + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + bformata(glsl, "%d", psOperand->aui32ArraySizes[1]); + + if (psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + bcatcstr(glsl, "+"); + TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + } + bcatcstr(glsl, "]"); + break; + } + case OPERAND_TYPE_STREAM: + { + bformata(glsl, "%d", psOperand->ui32RegisterNumber); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // In HLSL the instance id is uint, so cast here. + bcatcstr(glsl, "uint(gl_InvocationID)"); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + /* + The "this" register is a register that provides up to 4 pieces of information: + X: Which CB holds the instance data + Y: Base element offset of the instance data within the instance CB + Z: Base sampler index + W: Base Texture index + + Can be different for each function call + */ + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + bcatcstr(glsl, "gl_Position"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + bcatcstr(glsl, "gl_Layer"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + bcatcstr(glsl, "gl_ClipDistance"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_CULL_DISTANCE: + bcatcstr(glsl, "gl_CullDistance"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + bcatcstr(glsl, "gl_ViewportIndex"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_VertexIndex"); + else + bcatcstr(glsl, "gl_VertexID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0) + bcatcstr(glsl, "gl_InstanceIndex"); + else + bcatcstr(glsl, "gl_InstanceID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, "(gl_FrontFacing ? 0xffffffffu : uint(0))"); // Old ES3.0 Adrenos treat 0u as const int + else + bcatcstr(glsl, "(gl_FrontFacing ? 1 : 0)"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + bcatcstr(glsl, "gl_PrimitiveID"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelOuter"); + else + bcatcstr(glsl, "gl_TessLevelOuter[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[1]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[2]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelOuter[3]"); + *pui32IgnoreSwizzle = 1; + break; + + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + bcatcstr(glsl, "gl_TessLevelInner"); + else + bcatcstr(glsl, "gl_TessLevelInner[0]"); + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + bcatcstr(glsl, "gl_TessLevelInner[1]"); + *pui32IgnoreSwizzle = 1; + break; + default: + bformata(glsl, "%spatch%s%d", psContext->psShader->eShaderType == HULL_SHADER ? psContext->outputPrefix : psContext->inputPrefix, psIn->semanticName.c_str(), psIn->ui32SemanticIndex); + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + + break; + } + + + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBitcastOp && (*pui32IgnoreSwizzle == 0)) + { + // some glsl compilers (Switch's GLSLc) emit warnings "u_xlat.w uninitialized" if generated code looks like: "floatBitsToUint(u_xlat).xz". Instead, generate: "floatBitsToUint(u_xlat.xz)" + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + bcatcstr(glsl, ") * 0xffffffffu"); + else + { + if (HaveUnsignedTypes(psContext->psShader->eTargetLanguage)) + bcatcstr(glsl, ") * int(0xffffffffu)"); + else + bcatcstr(glsl, ") * -1"); // GLSL ES 2 spec: high precision ints are guaranteed to have a range of (-2^16, 2^16) + } + + numParenthesis--; + bcatcstr(glsl, ")"); + numParenthesis--; + } + + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } +} + +void ToGLSL::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) +{ + TranslateOperand(*psContext->currentGLSLString, psOperand, ui32TOFlag, ui32ComponentMask, forceNoConversion); +} + +void ToGLSL::TranslateOperand(bstring glsl, const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask, bool forceNoConversion) +{ + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if (psContext->psShader->ui32MajorVersion <= 3) + { + ui32TOFlag &= ~(TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_BITCAST_TO_INT | TO_AUTO_BITCAST_TO_UINT); + } + + if (!HaveUnsignedTypes(psContext->psShader->eTargetLanguage) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER)) + { + ui32TOFlag &= ~TO_FLAG_UNSIGNED_INTEGER; + ui32TOFlag |= TO_FLAG_INTEGER; + } + + if (ui32TOFlag & TO_FLAG_NAME_ONLY) + { + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase, forceNoConversion); + return; + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, "(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, "abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, "-abs("); + break; + } + } + + TranslateVariableNameWithMask(glsl, psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase, forceNoConversion); + + if (psContext->psShader->eShaderType == HULL_SHADER && psOperand->eType == OPERAND_TYPE_OUTPUT && + psOperand->ui32RegisterNumber != 0 && psOperand->iArrayElements != 0 && psOperand->eIndexRep[0] != OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE + && psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE) + { + bcatcstr(glsl, "[gl_InvocationID]"); + } + + if (!ui32IgnoreSwizzle) + { + TranslateOperandSwizzleWithMask(glsl, psContext, psOperand, ui32ComponentMask, iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + bcatcstr(glsl, ")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + bcatcstr(glsl, ")"); + break; + } + } +} + +std::string ResourceName(HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) +{ + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if (bZCompare) + { + oss << "hlslcc_zcmp"; + } + + if (found) + { + int i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while (i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if (ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + } + else + { + oss << name; + } + if (psContext->IsVulkan() && group == RGROUP_UAV) + oss << "_origX" << ui32RegisterNumber << "X"; + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + } + std::string res = oss.str(); + // Prefix sampler names with 'sampler' unless it already starts with it + if (group == RGROUP_SAMPLER) + { + if (strncmp(res.c_str(), "sampler", 7) != 0) + res.insert(0, "sampler"); + } + + return res; +} + +void ResourceName(bstring targetStr, HLSLCrossCompilerContext* psContext, ResourceGroup group, const uint32_t ui32RegisterNumber, const int bZCompare) +{ + bstring glsl = (targetStr == NULL) ? *psContext->currentGLSLString : targetStr; + std::string res = ResourceName(psContext, group, ui32RegisterNumber, bZCompare); + bcatcstr(glsl, res.c_str()); +} + +std::string TextureSamplerName(ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) +{ + std::ostringstream oss; + const ResourceBinding* psTextureBinding = 0; + const ResourceBinding* psSamplerBinding = 0; + int foundTexture, foundSampler; + uint32_t i = 0; + uint32_t ui32ArrayOffset; + + foundTexture = psShaderInfo->GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32TextureRegisterNumber, &psTextureBinding); + foundSampler = psShaderInfo->GetResourceFromBindingPoint(RGROUP_SAMPLER, ui32SamplerRegisterNumber, &psSamplerBinding); + + if (!foundTexture || !foundSampler) + { + oss << "UnknownResource" << ui32TextureRegisterNumber << "_" << ui32SamplerRegisterNumber; + return oss.str(); + } + + ui32ArrayOffset = ui32TextureRegisterNumber - psTextureBinding->ui32BindPoint; + + std::string texName = psTextureBinding->name; + + while (i < texName.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (texName[i] == '[' || texName[i] == ']') + { + texName[i] = '_'; + } + + ++i; + } + + + if (bZCompare) + { + oss << "hlslcc_zcmp"; + } + + + if (ui32ArrayOffset) + { + oss << texName << ui32ArrayOffset << "TEX_with_SMP" << psSamplerBinding->name; + } + else + { + oss << texName << "TEX_with_SMP" << psSamplerBinding->name; + } + + return oss.str(); +} + +void ConcatTextureSamplerName(bstring str, ShaderInfo* psShaderInfo, const uint32_t ui32TextureRegisterNumber, const uint32_t ui32SamplerRegisterNumber, const int bZCompare) +{ + std::string texturesamplername = TextureSamplerName(psShaderInfo, ui32TextureRegisterNumber, ui32SamplerRegisterNumber, bZCompare); + bcatcstr(str, texturesamplername.c_str()); +} + +// Take an uniform buffer name and generate an instance name. +std::string UniformBufferInstanceName(HLSLCrossCompilerContext* psContext, const std::string& name) +{ + if (name == "$Globals") + { + char prefix = 'A'; + // Need to tweak Globals struct name to prevent clashes between shader stages + switch (psContext->psShader->eShaderType) + { + default: + ASSERT(0); + break; + case COMPUTE_SHADER: + prefix = 'C'; + break; + case VERTEX_SHADER: + prefix = 'V'; + break; + case PIXEL_SHADER: + prefix = 'P'; + break; + case GEOMETRY_SHADER: + prefix = 'G'; + break; + case HULL_SHADER: + prefix = 'H'; + break; + case DOMAIN_SHADER: + prefix = 'D'; + break; + } + + return std::string("_") + prefix + name.substr(1); + } + else + return std::string("_") + name; +} diff --git a/third_party/HLSLcc/src/toMetal.cpp b/third_party/HLSLcc/src/toMetal.cpp new file mode 100644 index 0000000..d66f55e --- /dev/null +++ b/third_party/HLSLcc/src/toMetal.cpp @@ -0,0 +1,988 @@ +#include "internal_includes/toMetal.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/Shader.h" +#include "internal_includes/debug.h" + +#include "internal_includes/Declaration.h" +#include "internal_includes/toGLSL.h" +#include "internal_includes/LoopTransform.h" +#include "internal_includes/HLSLccToolkit.h" +#include + +static void PrintStructDeclaration(HLSLCrossCompilerContext *psContext, bstring glsl, std::string &sname, StructDefinitions &defs) +{ + StructDefinition &d = defs[sname]; + if (d.m_IsPrinted) + return; + d.m_IsPrinted = true; + + + std::for_each(d.m_Dependencies.begin(), d.m_Dependencies.end(), [&psContext, &glsl, &defs](std::string &depName) + { + PrintStructDeclaration(psContext, glsl, depName, defs); + }); + + bformata(glsl, "struct %s\n{\n", sname.c_str()); + psContext->indent++; + std::for_each(d.m_Members.begin(), d.m_Members.end(), [&psContext, &glsl](const MemberDefinitions::value_type &mem) + { + psContext->AddIndentation(); + bcatcstr(glsl, mem.second.c_str()); + bcatcstr(glsl, ";\n"); + }); + + psContext->indent--; + bcatcstr(glsl, "};\n\n"); +} + +void ToMetal::PrintStructDeclarations(StructDefinitions &defs, const char *name) +{ + bstring glsl = *psContext->currentGLSLString; + StructDefinition &args = defs[name]; + std::for_each(args.m_Dependencies.begin(), args.m_Dependencies.end(), [this, glsl, &defs](std::string &sname) + { + PrintStructDeclaration(psContext, glsl, sname, defs); + }); +} + +static const char * GetPhaseFuncName(SHADER_PHASE_TYPE eType) +{ + switch (eType) + { + default: + case MAIN_PHASE: return ""; + case HS_GLOBAL_DECL_PHASE: return "hs_global_decls"; + case HS_FORK_PHASE: return "fork_phase"; + case HS_CTRL_POINT_PHASE: return "control_point_phase"; + case HS_JOIN_PHASE: return "join_phase"; + } +} + +static void DoHullShaderPassthrough(HLSLCrossCompilerContext *psContext) +{ + uint32_t i; + bstring glsl = *psContext->currentGLSLString; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + const ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + psContext->AddIndentation(); + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + bformata(glsl, "%s%s = %scp[controlPointID].%s;\n", psContext->outputPrefix, "mtl_Position", psContext->inputPrefix, "mtl_Position"); + else + bformata(glsl, "%s%s%d = %scp[controlPointID].%s%d;\n", psContext->outputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex, psContext->inputPrefix, psSig->semanticName.c_str(), psSig->ui32SemanticIndex); + } +} + +bool ToMetal::Translate() +{ + bstring glsl; + uint32_t i; + Shader* psShader = psContext->psShader; + uint32_t ui32Phase; + + psContext->psTranslator = this; + + SetIOPrefixes(); + psShader->ExpandSWAPCs(); + psShader->ForcePositionToHighp(); + psShader->AnalyzeIOOverlap(); + if ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0) + psShader->SetMaxSemanticIndex(); + psShader->FindUnusedGlobals(psContext->flags); + + psContext->indent = 0; + + glsl = bfromcstralloc(1024 * 10, ""); + bstring bodyglsl = bfromcstralloc(1024 * 10, ""); + + psContext->glsl = glsl; + for (i = 0; i < psShader->asPhases.size(); ++i) + { + psShader->asPhases[i].postShaderCode = bfromcstralloc(1024 * 5, ""); + psShader->asPhases[i].earlyMain = bfromcstralloc(1024 * 5, ""); + } + + psContext->currentGLSLString = &glsl; + psShader->eTargetLanguage = LANG_METAL; + psShader->extensions = NULL; + psContext->currentPhase = MAIN_PHASE; + + psContext->ClearDependencyData(); + + const SHADER_PHASE_TYPE ePhaseFuncCallOrder[3] = { HS_CTRL_POINT_PHASE, HS_FORK_PHASE, HS_JOIN_PHASE }; + uint32_t ui32PhaseCallIndex; + int hasControlPointPhase = 0; + + const int maxThreadsPerThreadGroup = 32; + int numPatchesInThreadGroup = 0; + bool hasControlPoint = false; + bool hasPatchConstant = false; + std::string tessVertexFunctionArguments; + + if ((psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + m_StructDefinitions[""].m_Members = psContext->psDependencies->m_SharedFunctionMembers; + m_TextureSlots = psContext->psDependencies->m_SharedTextureSlots; + m_SamplerSlots = psContext->psDependencies->m_SharedSamplerSlots; + m_BufferSlots = psContext->psDependencies->m_SharedBufferSlots; + hasControlPoint = psContext->psDependencies->hasControlPoint; + hasPatchConstant = psContext->psDependencies->hasPatchConstant; + } + } + + ClampPartialPrecisions(); + + for (ui32Phase = 0; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase &phase = psShader->asPhases[ui32Phase]; + phase.UnvectorizeImmMoves(); + psContext->DoDataTypeAnalysis(&phase); + phase.ResolveUAVProperties(psShader->sInfo); + ReserveUAVBindingSlots(&phase); // TODO: unify slot allocation code between gl/metal/vulkan + HLSLcc::DoLoopTransform(psContext, phase); + } + + psShader->PruneTempRegisters(); + + //Special case. Can have multiple phases. + if (psShader->eShaderType == HULL_SHADER) + { + psShader->ConsolidateHullTempVars(); + + // Find out if we have a passthrough hull shader + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + hasControlPointPhase = 1; + } + } + + // Hull and Domain shaders get merged into vertex shader output + if (!(psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER)) + { + if (psContext->flags & HLSLCC_FLAG_DISABLE_FASTMATH) + bcatcstr(glsl, "#define UNITY_DISABLE_FASTMATH\n"); + bcatcstr(glsl, "#include \n#include \nusing namespace metal;\n"); + bcatcstr(glsl, "\n#if !(__HAVE_FMA__)\n#define fma(a,b,c) ((a) * (b) + (c))\n#endif\n\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->indent++; + + // Phase 1 is always the global decls phase, no instructions + for (i = 0; i < psShader->asPhases[1].psDecl.size(); ++i) + { + TranslateDeclaration(&psShader->asPhases[1].psDecl[i]); + } + + if (hasControlPointPhase == 0) + { + DeclareHullShaderPassthrough(); + } + + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + // bformata(glsl, "//%s declarations\n", GetPhaseFuncName(psPhase->ePhase)); + } + for (i = 0; i < psPhase->psDecl.size(); ++i) + { + TranslateDeclaration(&psPhase->psDecl[i]); + } + } + } + + psContext->indent--; + + numPatchesInThreadGroup = maxThreadsPerThreadGroup / std::max(psShader->sInfo.ui32TessInputControlPointCount, psShader->sInfo.ui32TessOutputControlPointCount); + } + else + { + psContext->indent++; + + for (i = 0; i < psShader->asPhases[0].psDecl.size(); ++i) + TranslateDeclaration(&psShader->asPhases[0].psDecl[i]); + + psContext->indent--; + + // Output default implementations for framebuffer index remap if needed + if (m_NeedFBOutputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_O\n\t#define XLT_REMAP_O {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_o[] = XLT_REMAP_O;\n"); + if (m_NeedFBInputRemapDecl) + bcatcstr(glsl, "#ifndef XLT_REMAP_I\n\t#define XLT_REMAP_I {0, 1, 2, 3, 4, 5, 6, 7}\n#endif\nconstexpr constant uint xlt_remap_i[] = XLT_REMAP_I;\n"); + + DeclareClipPlanes(&psShader->asPhases[0].psDecl[0], psShader->asPhases[0].psDecl.size()); + GenerateTexturesReflection(&psContext->m_Reflection); + } + + if (psShader->eShaderType == HULL_SHADER) + { + psContext->currentPhase = MAIN_PHASE; + + if (m_StructDefinitions["Mtl_ControlPoint"].m_Members.size() > 0) + { + hasControlPoint = true; + + m_StructDefinitions["Mtl_ControlPoint"].m_Dependencies.push_back("Mtl_ControlPoint"); + m_StructDefinitions["Mtl_ControlPointIn"].m_Dependencies.push_back("Mtl_ControlPointIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPoint"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_ControlPointIn"); + } + + if (m_StructDefinitions["Mtl_PatchConstant"].m_Members.size() > 0) + { + hasPatchConstant = true; + + m_StructDefinitions["Mtl_PatchConstant"].m_Dependencies.push_back("Mtl_PatchConstant"); + m_StructDefinitions["Mtl_PatchConstantIn"].m_Dependencies.push_back("Mtl_PatchConstantIn"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstant"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_PatchConstantIn"); + } + + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numPatches", "uint numPatches")); + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.push_back(std::make_pair("numControlPointsPerPatch", "ushort numControlPointsPerPatch")); + + if (m_StructDefinitions["Mtl_KernelPatchInfo"].m_Members.size() > 0) + { + m_StructDefinitions["Mtl_KernelPatchInfo"].m_Dependencies.push_back("Mtl_KernelPatchInfo"); + PrintStructDeclarations(m_StructDefinitions, "Mtl_KernelPatchInfo"); + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[GetInputStructName()].m_Dependencies.push_back(GetInputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); + + // Hack, we're reusing Mtl_VertexOut as an hull shader input array, so no need to declare original contents + m_StructDefinitions[GetInputStructName()].m_Members.clear(); + + bstring vertexOut = bfromcstr(""); + bformata(vertexOut, "Mtl_VertexOut cp[%d]", psShader->sInfo.ui32TessOutputControlPointCount); + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", (const char *)vertexOut->data)); + bdestroy(vertexOut); + } + + if (psContext->psDependencies) + { + for (auto i = psContext->psDependencies->m_SharedFunctionMembers.begin(), in = psContext->psDependencies->m_SharedFunctionMembers.end(); i != in;) + { + tessVertexFunctionArguments += i->first.c_str(); + ++i; + + // we want to avoid trailing comma + if (i != in) + tessVertexFunctionArguments += ", "; + } + } + } + + if (psShader->eShaderType == DOMAIN_SHADER) + { + // For preserving data layout, reuse Mtl_ControlPoint/Mtl_PatchConstant from hull shader + if (hasControlPoint) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("cp", "patch_control_point cp")); + if (hasPatchConstant) + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair("patch", "Mtl_PatchConstantIn patch")); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + if (psContext->psDependencies) + { + psContext->psDependencies->m_SharedFunctionMembers = m_StructDefinitions[""].m_Members; + psContext->psDependencies->m_SharedTextureSlots = m_TextureSlots; + psContext->psDependencies->m_SharedTextureSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedSamplerSlots = m_SamplerSlots; + psContext->psDependencies->m_SharedSamplerSlots.SaveTotalShaderStageAllocationsCount(); + psContext->psDependencies->m_SharedBufferSlots = m_BufferSlots; + psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount(); + } + } + + if (m_StructDefinitions[GetInputStructName()].m_Members.size() > 0) + { + if (psShader->eShaderType == HULL_SHADER) + { + if (psContext->psDependencies) + { + // if we go for fully procedural geometry we might end up without Mtl_VertexIn + for (std::vector::const_iterator itr = psContext->psDependencies->m_SharedDependencies.begin(); itr != psContext->psDependencies->m_SharedDependencies.end(); itr++) + { + if (*itr == "Mtl_VertexIn") + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("vertexInput", "Mtl_VertexIn vertexInput [[ stage_in ]]")); + if (tessVertexFunctionArguments.length()) + tessVertexFunctionArguments += ", "; + tessVertexFunctionArguments += "vertexInput"; + break; + } + } + } + + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tID", "uint2 tID [[ thread_position_in_grid ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("groupID", "ushort2 groupID [[ threadgroup_position_in_grid ]]")); + + bstring buffer = bfromcstr(""); + uint32_t slot = 0; + + if (hasControlPoint) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 1, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_ControlPoint *controlPoints [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("controlPoints", (const char *)buffer->data)); + btrunc(buffer, 0); + } + + if (hasPatchConstant) + { + slot = m_BufferSlots.GetBindingSlot(0xffff - 2, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device Mtl_PatchConstant *patchConstants [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchConstants", (const char *)buffer->data)); + btrunc(buffer, 0); + } + + slot = m_BufferSlots.GetBindingSlot(0xffff - 3, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "device %s *tessFactors [[ buffer(%d) ]]", psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("tessFactors", (const char *)buffer->data)); + btrunc(buffer, 0); + + slot = m_BufferSlots.GetBindingSlot(0xffff - 4, BindingSlotAllocator::ConstantBuffer); + bformata(buffer, "constant Mtl_KernelPatchInfo &patchInfo [[ buffer(%d) ]]", slot); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("patchInfo", (const char *)buffer->data)); + btrunc(buffer, 0); + + bdestroy(buffer); + } + else if (psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input")); + } + else + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("input", GetInputStructName() + " input [[ stage_in ]]")); + } + + m_StructDefinitions[""].m_Dependencies.push_back(GetInputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetInputStructName()); + } + + if ((psShader->eShaderType == VERTEX_SHADER || psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0) + { + // m_StructDefinitions is inherited between tessellation shader stages but some builtins need exceptions + std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&psShader](MemberDefinitions::value_type &mem) + { + if (mem.first == "mtl_InstanceID") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_InstanceID"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_InstanceID passed through groupID"); + } + else if (mem.first == "mtl_BaseInstance") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_BaseInstance"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_BaseInstance ignored"); + } + else if (mem.first == "mtl_VertexID") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_VertexID"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_VertexID generated in compute kernel"); + else if (psShader->eShaderType == DOMAIN_SHADER) + mem.second.assign("// mtl_VertexID unused"); + } + else if (mem.first == "mtl_BaseVertex") + { + if (psShader->eShaderType == VERTEX_SHADER) + mem.second.assign("uint mtl_BaseVertex"); + else if (psShader->eShaderType == HULL_SHADER) + mem.second.assign("// mtl_BaseVertex generated in compute kernel"); + else if (psShader->eShaderType == DOMAIN_SHADER) + mem.second.assign("// mtl_BaseVertex unused"); + } + }); + } + + if (psShader->eShaderType != COMPUTE_SHADER) + { + if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) + { + m_StructDefinitions[""].m_Dependencies.push_back(GetOutputStructName()); + if (psContext->psDependencies) + psContext->psDependencies->m_SharedDependencies.push_back(GetOutputStructName()); + } + } + + PrintStructDeclarations(m_StructDefinitions); + + psContext->currentGLSLString = &bodyglsl; + + bool popPragmaDiagnostic = false; + if (psShader->eShaderType == HULL_SHADER || psShader->eShaderType == DOMAIN_SHADER) + { + popPragmaDiagnostic = true; + + bcatcstr(bodyglsl, "#pragma clang diagnostic push\n"); + bcatcstr(bodyglsl, "#pragma clang diagnostic ignored \"-Wunused-parameter\"\n"); + } + + switch (psShader->eShaderType) + { + case VERTEX_SHADER: + if ((psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0) + bcatcstr(bodyglsl, "vertex Mtl_VertexOut xlatMtlMain(\n"); + else + bcatcstr(bodyglsl, "static Mtl_VertexOut vertexFunction(\n"); + break; + case PIXEL_SHADER: + if (psShader->sInfo.bEarlyFragmentTests) + bcatcstr(bodyglsl, "[[early_fragment_tests]]\n"); + if (m_StructDefinitions[GetOutputStructName()].m_Members.size() > 0) + bcatcstr(bodyglsl, "fragment Mtl_FragmentOut xlatMtlMain(\n"); + else + bcatcstr(bodyglsl, "fragment void xlatMtlMain(\n"); + break; + case COMPUTE_SHADER: + bcatcstr(bodyglsl, "kernel void computeMain(\n"); + break; + case HULL_SHADER: + bcatcstr(bodyglsl, "kernel void patchKernel(\n"); + break; + case DOMAIN_SHADER: + { + const char *patchType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "quad" : "triangle"; + uint32_t patchCount = psShader->sInfo.ui32TessOutputControlPointCount; + bformata(bodyglsl, "[[patch(%s, %d)]] vertex Mtl_VertexOutPostTess xlatMtlMain(\n", patchType, patchCount); + break; + } + default: + // Not supported + ASSERT(0); + return false; + } + + psContext->indent++; + for (auto itr = m_StructDefinitions[""].m_Members.begin();;) + { + if (itr == m_StructDefinitions[""].m_Members.end()) + break; + + psContext->AddIndentation(); + bcatcstr(bodyglsl, itr->second.c_str()); + + itr++; + if (itr != m_StructDefinitions[""].m_Members.end()) + bcatcstr(bodyglsl, ",\n"); + } + + // Figure and declare counters and their binds (we also postponed buffer reflection until now) + for (auto it = m_BufferReflections.begin(); it != m_BufferReflections.end(); ++it) + { + uint32_t bind = it->second.bind; + if (it->second.hasCounter) + { + const uint32_t counterBind = m_BufferSlots.PeekFirstFreeSlot(); + m_BufferSlots.ReserveBindingSlot(counterBind, BindingSlotAllocator::UAV); + + bformata(bodyglsl, ",\n\t\tdevice atomic_uint* %s_counter [[ buffer(%d) ]]", it->first.c_str(), counterBind); + + // Offset with 1 so we can capture counters that are bound to slot 0 (if, say, user decides to start buffers at register 1 or higher) + bind |= ((counterBind + 1) << 16); + } + psContext->m_Reflection.OnBufferBinding(it->first, bind, it->second.isUAV); + } + + bcatcstr(bodyglsl, ")\n{\n"); + + if (popPragmaDiagnostic) + bcatcstr(bodyglsl, "#pragma clang diagnostic pop\n"); + + if (psShader->eShaderType != COMPUTE_SHADER) + { + if (psShader->eShaderType == VERTEX_SHADER) + { + // Fix HLSL compatibility with DrawProceduralIndirect, SV_InstanceID always starts at 0 but with Metal, a base instance was not subtracted for equal behavior + // Base semantics available everywhere starting with iOS9 (except hardware limitation exists with the original Apple A7/A8 GPUs, causing UNITY_SUPPORT_INDIRECT_BUFFERS=0) + std::for_each(m_StructDefinitions[""].m_Members.begin(), m_StructDefinitions[""].m_Members.end(), [&](MemberDefinitions::value_type &mem) + { + if (mem.first == "mtl_InstanceID") + { + bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_BaseInstance = 0;\n"); + bcatcstr(bodyglsl, "#endif\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_InstanceID = mtl_InstanceID - mtl_BaseInstance;\n"); + } + else if (mem.first == "mtl_VertexID") + { + bcatcstr(bodyglsl, "#if !UNITY_SUPPORT_INDIRECT_BUFFERS\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_BaseVertex = 0;\n"); + bcatcstr(bodyglsl, "#endif\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "mtl_VertexID = mtl_VertexID - mtl_BaseVertex;\n"); + } + }); + } + + if (m_StructDefinitions[GetOutputStructName().c_str()].m_Members.size() > 0) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, GetOutputStructName().c_str()); + bcatcstr(bodyglsl, " output;\n"); + } + } + + if (psShader->eShaderType == HULL_SHADER) + { + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "Mtl_PatchConstant patch;\n"); + } + + psContext->AddIndentation(); + bformata(bodyglsl, "const uint numPatchesInThreadGroup = %d;\n", numPatchesInThreadGroup); // Hardcoded because of threadgroup array below + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint patchID = (tID.x / patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const bool patchValid = (patchID < patchInfo.numPatches);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_BaseInstance = 0;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_InstanceID = groupID.y - mtl_BaseInstance;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint internalPatchID = mtl_InstanceID * patchInfo.numPatches + patchID;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint patchIDInThreadGroup = (patchID % numPatchesInThreadGroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint controlPointID = (tID.x % patchInfo.numControlPointsPerPatch);\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_BaseVertex = 0;\n"); + psContext->AddIndentation(); + bcatcstr(bodyglsl, "const uint mtl_VertexID = ((mtl_InstanceID * (patchInfo.numControlPointsPerPatch * patchInfo.numPatches)) + tID.x) - mtl_BaseVertex;\n"); + + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s inputGroup[numPatchesInThreadGroup];\n", GetInputStructName().c_str()); + psContext->AddIndentation(); + bformata(bodyglsl, "threadgroup %s &input = inputGroup[patchIDInThreadGroup];\n", GetInputStructName().c_str()); + + psContext->AddIndentation(); + std::string tessFactorBufferType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "MTLQuadTessellationFactorsHalf" : "MTLTriangleTessellationFactorsHalf"; + bformata(bodyglsl, "%s tessFactor;\n", tessFactorBufferType.c_str()); + } + + // There are cases when there are no control point phases and we have to do passthrough + if (psShader->eShaderType == HULL_SHADER && hasControlPointPhase == 0) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + // Passthrough control point phase, run the rest only once per patch + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); + + DoHullShaderPassthrough(psContext); + + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + + if (psShader->eShaderType == HULL_SHADER) + { + for (ui32PhaseCallIndex = 0; ui32PhaseCallIndex < 3; ui32PhaseCallIndex++) + { + for (ui32Phase = 2; ui32Phase < psShader->asPhases.size(); ui32Phase++) + { + uint32_t i; + ShaderPhase *psPhase = &psShader->asPhases[ui32Phase]; + if (psPhase->ePhase != ePhaseFuncCallOrder[ui32PhaseCallIndex]) + continue; + psContext->currentPhase = ui32Phase; + + if (psPhase->earlyMain->slen > 1) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + } + + bconcat(bodyglsl, psPhase->earlyMain); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + } + } + + psContext->AddIndentation(); + bformata(bodyglsl, "// %s%d\n", GetPhaseFuncName(psShader->asPhases[ui32Phase].ePhase), ui32Phase); + if (psPhase->ui32InstanceCount > 1) + { + psContext->AddIndentation(); + bformata(bodyglsl, "for (int phaseInstanceID = 0; phaseInstanceID < %d; phaseInstanceID++) {\n", psPhase->ui32InstanceCount); + psContext->indent++; + } + else + { + if (psContext->currentPhase == HS_CTRL_POINT_PHASE && hasControlPointPhase == 1) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (patchValid) {\n"); + psContext->indent++; + + psContext->AddIndentation(); + bformata(bodyglsl, "input.cp[controlPointID] = vertexFunction(%s);\n", tessVertexFunctionArguments.c_str()); + } + else + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "{\n"); + psContext->indent++; + } + } + + if (psPhase->psInst.size() > 0) + { + //The minus one here is remove the return statement at end of phases. + //We don't want to translate that, we'll just end the function body. + ASSERT(psPhase->psInst[psPhase->psInst.size() - 1].eOpcode == OPCODE_RET); + for (i = 0; i < psPhase->psInst.size() - 1; ++i) + { + TranslateInstruction(&psPhase->psInst[i]); + } + } + + psContext->indent--; + psContext->AddIndentation(); + bformata(bodyglsl, "}\n"); + + if (psPhase->hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Post shader code ---\n"); + } + + bconcat(bodyglsl, psPhase->postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End post shader code ---\n"); + } + } + + if (psShader->asPhases[ui32Phase].ePhase == HS_CTRL_POINT_PHASE) + { + // We're done printing control point phase, run the rest only once per patch + psContext->AddIndentation(); + bcatcstr(bodyglsl, "threadgroup_barrier(mem_flags::mem_threadgroup);\n"); + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "if (!patchValid) {\n"); + psContext->indent++; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "return;\n"); + psContext->indent--; + psContext->AddIndentation(); + bcatcstr(bodyglsl, "}\n"); + } + } + } + + if (hasControlPoint) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "controlPoints[mtl_VertexID] = output;\n"); + } + + psContext->AddIndentation(); + bcatcstr(bodyglsl, "tessFactors[internalPatchID] = tessFactor;\n"); + + if (hasPatchConstant) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "patchConstants[internalPatchID] = patch;\n"); + } + + if (psContext->psDependencies) + { + //Save partitioning and primitive type for use by domain shader. + psContext->psDependencies->eTessOutPrim = psShader->sInfo.eTessOutPrim; + psContext->psDependencies->eTessPartitioning = psShader->sInfo.eTessPartitioning; + psContext->psDependencies->numPatchesInThreadGroup = numPatchesInThreadGroup; + psContext->psDependencies->hasControlPoint = hasControlPoint; + psContext->psDependencies->hasPatchConstant = hasPatchConstant; + } + } + else + { + if (psContext->psShader->asPhases[0].earlyMain->slen > 1) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- Start Early Main ---\n"); + } + + bconcat(bodyglsl, psContext->psShader->asPhases[0].earlyMain); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(bodyglsl, "//--- End Early Main ---\n"); + } + } + + for (i = 0; i < psShader->asPhases[0].psInst.size(); ++i) + { + TranslateInstruction(&psShader->asPhases[0].psInst[i]); + } + } + + psContext->indent--; + + bcatcstr(bodyglsl, "}\n"); + + psContext->currentGLSLString = &glsl; + + if (psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + { + psContext->m_Reflection.OnTessellationKernelInfo(psContext->psDependencies->m_SharedBufferSlots.SaveTotalShaderStageAllocationsCount()); + } + + if (psShader->eShaderType == DOMAIN_SHADER && psContext->psDependencies) + { + int mtlTessellationPartitionMode = -1; + int mtlWinding = -1; + + switch (psContext->psDependencies->eTessPartitioning) + { + case TESSELLATOR_PARTITIONING_INTEGER: + mtlTessellationPartitionMode = 1; // MTLTessellationPartitionModeInteger + break; + case TESSELLATOR_PARTITIONING_POW2: + mtlTessellationPartitionMode = 0; // MTLTessellationPartitionModePow2 + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_ODD: + mtlTessellationPartitionMode = 2; // MTLTessellationPartitionModeFractionalOdd + break; + case TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN: + mtlTessellationPartitionMode = 3; // MTLTessellationPartitionModeFractionalEven + break; + case TESSELLATOR_PARTITIONING_UNDEFINED: + default: + ASSERT(0); + break; + } + + switch (psContext->psDependencies->eTessOutPrim) + { + case TESSELLATOR_OUTPUT_TRIANGLE_CW: + mtlWinding = 0; // MTLWindingClockwise + break; + case TESSELLATOR_OUTPUT_TRIANGLE_CCW: + mtlWinding = 1; // MTLWindingCounterClockwise + break; + case TESSELLATOR_OUTPUT_POINT: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"point\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_LINE: + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: outputtopology(\"line\") not supported.", 0, true); + break; + case TESSELLATOR_OUTPUT_UNDEFINED: + default: + ASSERT(0); + break; + } + + psContext->m_Reflection.OnTessellationInfo(mtlTessellationPartitionMode, mtlWinding, (uint32_t)psContext->psDependencies->fMaxTessFactor, psContext->psDependencies->numPatchesInThreadGroup); + } + + bcatcstr(glsl, m_ExtraGlobalDefinitions.c_str()); + + // Print out extra functions we generated + std::for_each(m_FunctionDefinitions.begin(), m_FunctionDefinitions.end(), [&glsl](const FunctionDefinitions::value_type &p) + { + bcatcstr(glsl, p.second.c_str()); + bcatcstr(glsl, "\n"); + }); + + // And then the actual function body + bconcat(glsl, bodyglsl); + bdestroy(bodyglsl); + + return true; +} + +void ToMetal::DeclareExtraFunction(const std::string &name, const std::string &body) +{ + if (m_FunctionDefinitions.find(name) != m_FunctionDefinitions.end()) + return; + m_FunctionDefinitions.insert(std::make_pair(name, body)); +} + +std::string ToMetal::GetOutputStructName() const +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexOut"; + case PIXEL_SHADER: + return "Mtl_FragmentOut"; + case HULL_SHADER: + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_JOIN_PHASE) + return "Mtl_PatchConstant"; + return "Mtl_ControlPoint"; + case DOMAIN_SHADER: + return "Mtl_VertexOutPostTess"; + default: + ASSERT(0); + return ""; + } +} + +std::string ToMetal::GetInputStructName() const +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + return "Mtl_VertexIn"; + case PIXEL_SHADER: + return "Mtl_FragmentIn"; + case COMPUTE_SHADER: + return "Mtl_KernelIn"; + case HULL_SHADER: + return "Mtl_HullIn"; + case DOMAIN_SHADER: + return "Mtl_VertexInPostTess"; + default: + ASSERT(0); + return ""; + } +} + +std::string ToMetal::GetCBName(const std::string& cbName) const +{ + std::string output = cbName; + if (cbName[0] == '$') + { + // "$Globals" should have different names in different shaders so that CbKey can discretely identify a CB. + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: + output[0] = 'V'; + break; + case PIXEL_SHADER: + output[0] = 'F'; + break; + case COMPUTE_SHADER: + output = cbName.substr(1); + break; + default: + ASSERT(0); + break; + } + } + return output; +} + +void ToMetal::SetIOPrefixes() +{ + switch (psContext->psShader->eShaderType) + { + case VERTEX_SHADER: + case HULL_SHADER: + case DOMAIN_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case PIXEL_SHADER: + psContext->inputPrefix = "input."; + psContext->outputPrefix = "output."; + break; + + case COMPUTE_SHADER: + psContext->inputPrefix = ""; + psContext->outputPrefix = ""; + break; + default: + ASSERT(0); + break; + } +} + +void ToMetal::ClampPartialPrecisions() +{ + HLSLcc::ForEachOperand(psContext->psShader->asPhases[0].psInst.begin(), psContext->psShader->asPhases[0].psInst.end(), FEO_FLAG_ALL, + [](std::vector::iterator &i, Operand *o, uint32_t flags) + { + if (o->eMinPrecision == OPERAND_MIN_PRECISION_FLOAT_2_8) + o->eMinPrecision = OPERAND_MIN_PRECISION_FLOAT_16; + }); +} + +void ToMetal::ReserveUAVBindingSlots(ShaderPhase *phase) +{ + for (uint32_t p = 0; p < phase->psDecl.size(); ++p) + { + uint32_t regNo = phase->psDecl[p].asOperands[0].ui32RegisterNumber; + + if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW || + phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED) + { + m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); + } + else if (phase->psDecl[p].eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED) + { + // Typed buffers are atm faked using structured buffers -> bind in buffer space + if (phase->psDecl[p].value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) + m_BufferSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::RWBuffer); + else + m_TextureSlots.ReserveBindingSlot(regNo, BindingSlotAllocator::UAV); + } + } +} diff --git a/third_party/HLSLcc/src/toMetalDeclaration.cpp b/third_party/HLSLcc/src/toMetalDeclaration.cpp new file mode 100644 index 0000000..73a0cd0 --- /dev/null +++ b/third_party/HLSLcc/src/toMetalDeclaration.cpp @@ -0,0 +1,2454 @@ +#include "internal_includes/toMetal.h" +#include "internal_includes/debug.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/Declaration.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "internal_includes/languages.h" +#include +#include +#include + +using namespace HLSLcc; + +#ifndef fpcheck +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif +#endif // #ifndef fpcheck + + +bool ToMetal::TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix, int *iIgnoreRedirect) +{ + if (sig) + { + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_TessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 3); + std::ostringstream oss; + oss << "tessFactor.edgeTessellationFactor[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (psContext->psShader->eShaderType == HULL_SHADER && sig->semanticName == "SV_InsideTessFactor") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + ASSERT(sig->ui32SemanticIndex <= 1); + std::ostringstream oss; + oss << "tessFactor.insideTessellationFactor"; + if (psContext->psShader->sInfo.eTessDomain != TESSELLATOR_DOMAIN_TRI) + oss << "[" << sig->ui32SemanticIndex << "]"; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + + if (sig->semanticName == "SV_InstanceID") + { + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + } + + if (((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0) && + ((psContext->psShader->eShaderType == VERTEX_SHADER && (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) == 0))) + { + result = "mtl_Position"; + return true; + } + + switch (sig->eSystemValueType) + { + case NAME_POSITION: + if (psContext->psShader->eShaderType == PIXEL_SHADER) + result = "hlslcc_FragCoord"; + else + result = "mtl_Position"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_RENDER_TARGET_ARRAY_INDEX: + result = "mtl_Layer"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_CLIP_DISTANCE: + { + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes + char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, sig->ui32SemanticIndex); + result = tmpName; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (iIgnoreRedirect != NULL) *iIgnoreRedirect = 1; + return true; + } + case NAME_VIEWPORT_ARRAY_INDEX: + result = "mtl_ViewPortIndex"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_VERTEX_ID: + result = "mtl_VertexID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_INSTANCE_ID: + result = "mtl_InstanceID"; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case NAME_IS_FRONT_FACE: + result = "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case NAME_SAMPLE_INDEX: + result = "mtl_SampleID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + + default: + break; + } + + if (psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_CTRL_POINT_PHASE || + psContext->psShader->asPhases[psContext->currentPhase].ePhase == HS_FORK_PHASE) + { + std::ostringstream oss; + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + return true; + } + } + + switch (psOperand->eType) + { + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + result = "mtl_CoverageMask"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID: + result = "mtl_ThreadID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID: + result = "mtl_ThreadGroupID"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP: + result = "mtl_ThreadIDInGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED: + result = "mtl_ThreadIndexInThreadGroup"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + result = "mtl_TessCoord"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + result = "mtl_Depth"; + if (outSkipPrefix != NULL) *outSkipPrefix = true; + if (pui32IgnoreSwizzle) + *pui32IgnoreSwizzle = 1; + return true; + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + std::ostringstream oss; + ASSERT(sig != nullptr); + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (HLSLcc::WriteMaskToComponentCount(sig->ui32Mask) == 1 && pui32IgnoreSwizzle != NULL) + *pui32IgnoreSwizzle = 1; + return true; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + std::ostringstream oss; + ASSERT(sig != nullptr); + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + std::ostringstream oss; + ASSERT(sig != nullptr); + oss << sig->semanticName << sig->ui32SemanticIndex; + result = oss.str(); + if (outSkipPrefix != NULL) *outSkipPrefix = true; + return true; + break; + } + default: + ASSERT(0); + break; + } + + + return false; +} + +void ToMetal::DeclareBuiltinInput(const Declaration *psDecl) +{ + const SPECIAL_NAME eSpecialName = psDecl->asOperands[0].eSpecialName; + + Shader* psShader = psContext->psShader; + const Operand* psOperand = &psDecl->asOperands[0]; + const int regSpace = psOperand->GetRegisterSpace(psContext); + ASSERT(regSpace == 0); + + // we need to at least mark if they are scalars or not (as we might need to use vector ctor) + if (psOperand->GetNumInputElements(psContext) == 1) + psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] |= (int)psOperand->ui32CompMask; + + switch (eSpecialName) + { + case NAME_POSITION: + ASSERT(psContext->psShader->eShaderType == PIXEL_SHADER); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); + bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); + break; + case NAME_CLIP_DISTANCE: + ASSERT(0); // Should never be an input + break; + case NAME_VIEWPORT_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); + break; + case NAME_INSTANCE_ID: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_InstanceID", "uint mtl_InstanceID [[ instance_id ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseInstance", "uint mtl_BaseInstance [[ base_instance ]]")); // Requires Metal runtime 1.1+ + break; + case NAME_IS_FRONT_FACE: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FrontFace", "bool mtl_FrontFace [[ front_facing ]]")); + break; + case NAME_SAMPLE_INDEX: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_SampleID", "uint mtl_SampleID [[ sample_id ]]")); + break; + case NAME_VERTEX_ID: + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_VertexID", "uint mtl_VertexID [[ vertex_id ]]")); + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_BaseVertex", "uint mtl_BaseVertex [[ base_vertex ]]")); // Requires Metal runtime 1.1+ + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + default: + m_StructDefinitions[""].m_Members.push_back(std::make_pair(psDecl->asOperands[0].specialName, std::string("float4 ").append(psDecl->asOperands[0].specialName))); + ASSERT(0); // Catch this to see what's happening + break; + } +} + +void ToMetal::DeclareClipPlanes(const Declaration* decl, unsigned declCount) +{ + unsigned planeCount = 0; + for (unsigned i = 0, n = declCount; i < n; ++i) + { + const Operand* operand = &decl[i].asOperands[0]; + if (operand->eSpecialName == NAME_CLIP_DISTANCE) + planeCount += operand->GetMaxComponent(); + } + if (planeCount == 0) return; + + std::ostringstream oss; oss << "float mtl_ClipDistance [[ clip_distance ]]"; + if (planeCount > 1) oss << "[" << planeCount << "]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(std::string("mtl_ClipDistance"), oss.str())); + + Shader* shader = psContext->psShader; + + unsigned compCount = 1; + const ShaderInfo::InOutSignature* psFirstClipSignature; + if (shader->sInfo.GetOutputSignatureFromSystemValue(NAME_CLIP_DISTANCE, 0, &psFirstClipSignature)) + { + if (psFirstClipSignature->ui32Mask & (1 << 3)) compCount = 4; + else if (psFirstClipSignature->ui32Mask & (1 << 2)) compCount = 3; + else if (psFirstClipSignature->ui32Mask & (1 << 1)) compCount = 2; + } + + for (unsigned i = 0, n = declCount; i < n; ++i) + { + const Operand* operand = &decl[i].asOperands[0]; + if (operand->eSpecialName != NAME_CLIP_DISTANCE) continue; + + const ShaderInfo::InOutSignature* signature = 0; + shader->sInfo.GetOutputSignatureFromRegister(operand->ui32RegisterNumber, operand->ui32CompMask, 0, &signature); + const int semanticIndex = signature->ui32SemanticIndex; + + bformata(GetEarlyMain(psContext), "float4 phase%d_ClipDistance%d;\n", psContext->currentPhase, signature->ui32SemanticIndex); + + const char* swizzleStr[] = { "x", "y", "z", "w" }; + if (planeCount > 1) + { + for (int i = 0; i < compCount; ++i) + { + bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance[%d] = phase%d_ClipDistance%d.%s;\n", "output", semanticIndex * compCount + i, psContext->currentPhase, semanticIndex, swizzleStr[i]); + } + } + else + { + bformata(GetPostShaderCode(psContext), "%s.mtl_ClipDistance = phase%d_ClipDistance%d.x;\n", "output", psContext->currentPhase, semanticIndex); + } + } +} + +void ToMetal::GenerateTexturesReflection(HLSLccReflection* refl) +{ + for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) + { + // Match CheckSamplerAndTextureNameMatch behavior + const std::string samplerName1 = m_Textures[i].name, samplerName2 = "sampler" + m_Textures[i].name, samplerName3 = "sampler_" + m_Textures[i].name; + for (unsigned j = 0, m = m_Samplers.size(); j < m; ++j) + { + if (m_Samplers[j].name == samplerName1 || m_Samplers[j].name == samplerName2 || m_Samplers[j].name == samplerName3) + { + m_Textures[i].samplerBind = m_Samplers[j].slot; + break; + } + } + } + + for (unsigned i = 0, n = m_Textures.size(); i < n; ++i) + refl->OnTextureBinding(m_Textures[i].name, m_Textures[i].textureBind, m_Textures[i].samplerBind, m_Textures[i].isMultisampled, m_Textures[i].dim, m_Textures[i].uav); +} + +void ToMetal::DeclareBuiltinOutput(const Declaration *psDecl) +{ + std::string out = GetOutputStructName(); + + switch (psDecl->asOperands[0].eSpecialName) + { + case NAME_POSITION: + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Layer", "uint mtl_Layer [[ render_target_array_index ]]")); + break; + case NAME_CLIP_DISTANCE: + // it will be done separately in DeclareClipPlanes + break; + case NAME_VIEWPORT_ARRAY_INDEX: + // Only supported on a Mac + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_ViewPortIndex", "uint mtl_ViewPortIndex [[ viewport_array_index ]]")); + break; + case NAME_VERTEX_ID: + ASSERT(0); //VertexID is not an output + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + case NAME_INSTANCE_ID: + ASSERT(0); //InstanceID is not an output + break; + case NAME_IS_FRONT_FACE: + ASSERT(0); //FrontFacing is not an output + break; + + //For the quadrilateral domain, there are 6 factors (4 sides, 2 inner). + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + + //For the triangular domain, there are 4 factors (3 sides, 1 inner) + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + + //For the isoline domain, there are 2 factors (detail and density). + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + { + // Handled separately + break; + } + default: + // This might be SV_Position (because d3dcompiler is weird). Get signature and check + const ShaderInfo::InOutSignature *sig = NULL; + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].GetAccessMask(), 0, &sig); + ASSERT(sig != NULL); + if (sig->eSystemValueType == NAME_POSITION && sig->ui32SemanticIndex == 0) + { + m_StructDefinitions[out].m_Members.push_back(std::make_pair("mtl_Position", "float4 mtl_Position [[ position ]]")); + break; + } + + ASSERT(0); // Wut + break; + } + + psContext->m_Reflection.OnBuiltinOutput(psDecl->asOperands[0].eSpecialName); +} + +static std::string BuildOperandTypeString(OPERAND_MIN_PRECISION ePrec, INOUT_COMPONENT_TYPE eType, int numComponents) +{ + SHADER_VARIABLE_TYPE t = SVT_FLOAT; + switch (eType) + { + case INOUT_COMPONENT_FLOAT32: + t = SVT_FLOAT; + break; + case INOUT_COMPONENT_UINT32: + t = SVT_UINT; + break; + case INOUT_COMPONENT_SINT32: + t = SVT_INT; + break; + default: + ASSERT(0); + break; + } + // Can be overridden by precision + switch (ePrec) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + + case OPERAND_MIN_PRECISION_FLOAT_16: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT16; + break; + + case OPERAND_MIN_PRECISION_FLOAT_2_8: + ASSERT(eType == INOUT_COMPONENT_FLOAT32); + t = SVT_FLOAT10; + break; + + case OPERAND_MIN_PRECISION_SINT_16: + ASSERT(eType == INOUT_COMPONENT_SINT32); + t = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + ASSERT(eType == INOUT_COMPONENT_UINT32); + t = SVT_UINT16; + break; + } + return HLSLcc::GetConstructorForTypeMetal(t, numComponents); +} + +void ToMetal::DeclareHullShaderPassthrough() +{ + uint32_t i; + + for (i = 0; i < psContext->psShader->sInfo.psInputSignatures.size(); i++) + { + ShaderInfo::InOutSignature *psSig = &psContext->psShader->sInfo.psInputSignatures[i]; + + std::string name; + { + std::ostringstream oss; + oss << psSig->semanticName << psSig->ui32SemanticIndex; + name = oss.str(); + } + + if ((psSig->eSystemValueType == NAME_POSITION || psSig->semanticName == "POS") && psSig->ui32SemanticIndex == 0) + name = "mtl_Position"; + + uint32_t ui32NumComponents = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + std::string typeName = BuildOperandTypeString(OPERAND_MIN_PRECISION_DEFAULT, psSig->eComponentType, ui32NumComponents); + + std::ostringstream oss; + oss << typeName << " " << name; + oss << " [[ user(" << name << ") ]]"; + + std::string declString; + declString = oss.str(); + + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + + std::string out = GetOutputStructName(); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, declString)); + + // For preserving data layout, declare output struct as domain shader input, too + oss.str(""); + out += "In"; + + oss << typeName << " " << name; + // VERTEX_SHADER hardcoded on purpose + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } +} + +void ToMetal::HandleOutputRedirect(const Declaration *psDecl, const std::string &typeName) +{ + const Operand *psOperand = &psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0 && psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + else if (regSpace == 1 && psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int comp = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber] == 0); + + bformata(GetEarlyMain(psContext), "%s phase%d_Output%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + uint32_t mask, i; + psSig = NULL; + if (regSpace == 0) + psContext->psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, psContext->psShader->ui32CurrentVertexOutputStream, &psSig, true); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + // The register isn't necessarily packed full. Continue with the next component. + if (psSig == NULL) + { + comp++; + continue; + } + + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + mask = psSig->ui32Mask; + + ((Operand *)psOperand)->ui32CompMask = 1 << comp; + bstring str = GetPostShaderCode(psContext); + bcatcstr(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + bcatcstr(str, " = "); + + if (psSig->eComponentType == INOUT_COMPONENT_SINT32) + { + bformata(str, "as_type("); + hasCast = 1; + } + else if (psSig->eComponentType == INOUT_COMPONENT_UINT32) + { + bformata(str, "as_type("); + hasCast = 1; + } + bformata(str, "phase%d_Output%d_%d.", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + // Print out mask + for (i = 0; i < 4; i++) + { + if ((mask & (1 << i)) == 0) + continue; + + bformata(str, "%c", "xyzw"[i]); + } + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + bcatcstr(str, ";\n"); + } + + ((Operand *)psOperand)->ui32CompMask = origMask; + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +void ToMetal::HandleInputRedirect(const Declaration *psDecl, const std::string &typeName) +{ + Operand *psOperand = (Operand *)&psDecl->asOperands[0]; + Shader *psShader = psContext->psShader; + int needsRedirect = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + + int regSpace = psOperand->GetRegisterSpace(psContext); + if (regSpace == 0) + { + if (psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + needsRedirect = 1; + } + else if (psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xff) + { + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + needsRedirect = 1; + } + + if (needsRedirect == 1) + { + // TODO What if this is indexed? + int needsLooping = 0; + int i = 0; + uint32_t origArraySize = 0; + uint32_t origMask = psOperand->ui32CompMask; + + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] == 0); + + ++psContext->indent; + + // Does the input have multiple array components (such as geometry shader input, or domain shader control point input) + if ((psShader->eShaderType == DOMAIN_SHADER && regSpace == 0) || (psShader->eShaderType == GEOMETRY_SHADER)) + { + // The count is actually stored in psOperand->aui32ArraySizes[0] + origArraySize = psOperand->aui32ArraySizes[0]; + // bformata(glsl, "%s vec4 phase%d_Input%d_%d[%d];\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d[%d];\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, origArraySize); + needsLooping = 1; + i = origArraySize - 1; + } + else + // bformata(glsl, "%s vec4 phase%d_Input%d_%d;\n", Precision, psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + bformata(GetEarlyMain(psContext), "%s phase%d_Input%d_%d;\n", typeName.c_str(), psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber); + + // Do a conditional loop. In normal cases needsLooping == 0 so this is only run once. + do + { + int comp = 0; + bstring str = GetEarlyMain(psContext); + if (needsLooping) + bformata(str, "phase%d_Input%d_%d[%d] = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, i, typeName.c_str()); + else + bformata(str, "phase%d_Input%d_%d = %s(", psContext->currentPhase, regSpace, psOperand->ui32RegisterNumber, typeName.c_str()); + + while (comp < 4) + { + int numComps = 0; + int hasCast = 0; + int hasSig = 0; + if (regSpace == 0) + hasSig = psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + else + hasSig = psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, 1 << comp, &psSig, true); + + if (hasSig) + { + numComps = HLSLcc::GetNumberBitsSet(psSig->ui32Mask); + if (psSig->eComponentType != INOUT_COMPONENT_FLOAT32) + { + if (numComps > 1) + bformata(str, "as_type(", numComps); + else + bformata(str, "as_type("); + hasCast = 1; + } + + // Override the array size of the operand so TranslateOperand call below prints the correct index + if (needsLooping) + psOperand->aui32ArraySizes[0] = i; + + // And the component mask + psOperand->ui32CompMask = 1 << comp; + + bformata(str, TranslateOperand(psOperand, TO_FLAG_NAME_ONLY).c_str()); + + // Restore the original array size value and mask + psOperand->ui32CompMask = origMask; + if (needsLooping) + psOperand->aui32ArraySizes[0] = origArraySize; + + if (hasCast) + bcatcstr(str, ")"); + comp += numComps; + } + else // no signature found -> fill with zero + { + bcatcstr(str, "0"); + comp++; + } + + if (comp < 4) + bcatcstr(str, ", "); + } + bcatcstr(str, ");\n"); + } + while ((--i) >= 0); + + --psContext->indent; + + if (regSpace == 0) + psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + else + psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] = 0xfe; + } +} + +static std::string TranslateResourceDeclaration(HLSLCrossCompilerContext* psContext, + const Declaration *psDecl, const std::string& textureName, + bool isDepthSampler, bool isUAV) +{ + std::ostringstream oss; + const ResourceBinding* psBinding = 0; + const RESOURCE_DIMENSION eDimension = psDecl->value.eResourceDimension; + const uint32_t ui32RegisterNumber = psDecl->asOperands[0].ui32RegisterNumber; + REFLECT_RESOURCE_PRECISION ePrec = REFLECT_RESOURCE_PRECISION_UNKNOWN; + RESOURCE_RETURN_TYPE eType = RETURN_TYPE_UNORM; + std::string access = "sample"; + + if (isUAV) + { + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) != 0) + { + access = "write"; + if ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_READ) != 0) + { + access = "read_write"; + } + } + else + { + access = "read"; + eType = psDecl->sUAV.Type; + } + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, ui32RegisterNumber, &psBinding); + if (found) + { + ePrec = psBinding->ePrecision; + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + // Figured out by reverse engineering bitcode. flags b00xx means float1, b01xx = float2, b10xx = float3 and b11xx = float4 + } + } + else + { + int found; + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, ui32RegisterNumber, &psBinding); + if (found) + { + eType = (RESOURCE_RETURN_TYPE)psBinding->ui32ReturnType; + ePrec = psBinding->ePrecision; + + // TODO: it might make sense to propagate float earlier (as hlslcc might declare other variables depending on sampler prec) + // metal supports ONLY float32 depth textures + if (isDepthSampler) + { + switch (eDimension) + { + case RESOURCE_DIMENSION_TEXTURE2D: case RESOURCE_DIMENSION_TEXTURE2DMS: case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE2DARRAY: case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + ePrec = REFLECT_RESOURCE_PRECISION_HIGHP, eType = RETURN_TYPE_FLOAT; break; + default: + break; + } + } + } + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + access = "read"; + default: + break; + } + } + + SHADER_VARIABLE_TYPE svtType = HLSLcc::ResourceReturnTypeToSVTType(eType, ePrec); + std::string typeName = HLSLcc::GetConstructorForTypeMetal(svtType, 1); + + if ((textureName == "_CameraDepthTexture" || textureName == "_LastCameraDepthTexture") && svtType != SVT_FLOAT) + { + std::string msg = textureName + " should be float on Metal (use sampler2D or sampler2D_float). Incorrect type " + "can cause Metal validation failures or undefined results on some devices."; + psContext->m_Reflection.OnDiagnostics(msg, 0, false); + } + + switch (eDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + oss << "texture1d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1D: + { + oss << "texture1d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2D: + { + oss << (isDepthSampler ? "depth2d<" : "texture2d<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + oss << (isDepthSampler ? "depth2d_ms<" : "texture2d_ms<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE3D: + { + oss << "texture3d<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBE: + { + oss << (isDepthSampler ? "depthcube<" : "texturecube<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + oss << "texture1d_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + oss << (isDepthSampler ? "depth2d_array<" : "texture2d_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + // Not really supported in Metal but let's print it here anyway + oss << "texture2d_ms_array<" << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + oss << (isDepthSampler ? "depthcube_array<" : "texturecube_array<") << typeName << ", access::" << access << " >"; + return oss.str(); + break; + } + default: + ASSERT(0); + oss << "texture2d<" << typeName << ", access::" << access << " >"; + return oss.str(); + } +} + +static std::string GetInterpolationString(INTERPOLATION_MODE eMode) +{ + switch (eMode) + { + case INTERPOLATION_CONSTANT: + return " [[ flat ]]"; + + case INTERPOLATION_LINEAR: + return ""; + + case INTERPOLATION_LINEAR_CENTROID: + return " [[ centroid_perspective ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE: + return " [[ center_no_perspective ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID: + return " [[ centroid_no_perspective ]]"; + + case INTERPOLATION_LINEAR_SAMPLE: + return " [[ sample_perspective ]]"; + + case INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE: + return " [[ sample_no_perspective ]]"; + default: + ASSERT(0); + return ""; + } +} + +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVar &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) +{ + DeclareStructVariable(parentName, var.sType, withinCB, cumulativeOffset + var.ui32StartOffset, isUsed); +} + +void ToMetal::DeclareStructVariable(const std::string &parentName, const ShaderVarType &var, bool withinCB, uint32_t cumulativeOffset, bool isUsed) +{ + // CB arrays need to be defined as 4 component vectors to match DX11 data layout + bool arrayWithinCB = (withinCB && (var.Elements > 1) && (psContext->psShader->eShaderType == COMPUTE_SHADER)); + bool doDeclare = true; + + if (isUsed == false && ((psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS)) == 0) + isUsed = true; + + if (var.Class == SVC_STRUCT) + { + if (m_StructDefinitions.find(var.name + "_Type") == m_StructDefinitions.end()) + DeclareStructType(var.name + "_Type", var.Members, withinCB, cumulativeOffset + var.Offset); + + // Report Array-of-Struct CB top-level struct var after all members are reported. + if (var.Parent == NULL && var.Elements > 1 && withinCB) + { + // var.Type being SVT_VOID indicates it is a struct in this case. + psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, false, var.Elements, true); + } + + std::ostringstream oss; + oss << var.name << "_Type " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + m_StructDefinitions[parentName].m_Dependencies.push_back(var.name + "_Type"); + return; + } + else if (var.Class == SVC_MATRIX_COLUMNS || var.Class == SVC_MATRIX_ROWS) + { + std::ostringstream oss; + if (psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) + { + // Translate matrices into vec4 arrays + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, var.Rows, var.Columns); + oss << HLSLcc::GetConstructorForType(psContext, var.Type, 4) << " " << prefix << var.name; + + uint32_t elemCount = (var.Class == SVC_MATRIX_COLUMNS ? var.Columns : var.Rows); + if (var.Elements > 1) + { + elemCount *= var.Elements; + } + oss << "[" << elemCount << "]"; + + if (withinCB) + { + // On compute shaders we need to reflect the vec array as it is to support all possible matrix sizes correctly. + // On non-compute we can fake that we still have a matrix, as CB upload code will fill the data correctly on 4x4 matrices. + // That way we avoid the issues with mismatching types for builtins etc. + if (psContext->psShader->eShaderType == COMPUTE_SHADER) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 4, false, elemCount, isUsed); + else + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); + } + } + else + { + oss << HLSLcc::GetMatrixTypeName(psContext, var.Type, var.Columns, var.Rows); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + // TODO Verify whether the offset is from the beginning of the CB or from the beginning of the struct + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, var.Rows, var.Columns, true, var.Elements, isUsed); + } + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else if (var.Class == SVC_VECTOR && var.Columns > 1) + { + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : var.Columns); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, var.Columns, false, var.Elements, isUsed); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else if ((var.Class == SVC_SCALAR) || + (var.Class == SVC_VECTOR && var.Columns == 1)) + { + if (var.Type == SVT_BOOL) + { + //Use int instead of bool. + //Allows implicit conversions to integer and + //bool consumes 4-bytes in HLSL and GLSL anyway. + ((ShaderVarType &)var).Type = SVT_INT; + } + + std::ostringstream oss; + oss << HLSLcc::GetConstructorForTypeMetal(var.Type, arrayWithinCB ? 4 : 1); + oss << " " << var.name; + if (var.Elements > 1) + { + oss << "[" << var.Elements << "]"; + } + + if (withinCB) + doDeclare = psContext->m_Reflection.OnConstant(var.fullName, var.Offset + cumulativeOffset, var.Type, 1, 1, false, var.Elements, isUsed); + + if (doDeclare) + m_StructDefinitions[parentName].m_Members.push_back(std::make_pair(var.name, oss.str())); + } + else + { + ASSERT(0); + } +} + +void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset, bool stripUnused /* = false */) +{ + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + if (stripUnused && !itr->sType.m_IsUsed) + continue; + + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset, itr->sType.m_IsUsed); + } +} + +void ToMetal::DeclareStructType(const std::string &name, const std::vector &contents, bool withinCB, uint32_t cumulativeOffset) +{ + for (std::vector::const_iterator itr = contents.begin(); itr != contents.end(); itr++) + { + DeclareStructVariable(name, *itr, withinCB, cumulativeOffset); + } +} + +void ToMetal::DeclareConstantBuffer(const ConstantBuffer *psCBuf, uint32_t ui32BindingPoint) +{ + const bool isGlobals = (psCBuf->name == "$Globals"); + const bool stripUnused = isGlobals && (psContext->flags & HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS); + std::string cbname = GetCBName(psCBuf->name); + + // Note: if we're stripping unused members, both ui32TotalSizeInBytes and individual offsets into reflection will be completely off. + // However, the reflection layer re-calculates both to match Metal alignment rules anyway, so we're good. + if (!psContext->m_Reflection.OnConstantBuffer(cbname, psCBuf->ui32TotalSizeInBytes, psCBuf->GetMemberCount(stripUnused))) + return; + + if (psContext->psDependencies->IsMemberDeclared(cbname)) + return; + + DeclareStructType(cbname + "_Type", psCBuf->asVars, true, 0, stripUnused); + + std::ostringstream oss; + uint32_t slot = m_BufferSlots.GetBindingSlot(ui32BindingPoint, BindingSlotAllocator::ConstantBuffer); + + if (HLSLcc::IsUnityFlexibleInstancingBuffer(psCBuf)) + oss << "const constant " << psCBuf->asVars[0].name << "_Type* "; + else + oss << "constant " << cbname << "_Type& "; + oss << cbname << " [[ buffer(" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(cbname, oss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(cbname + "_Type"); + psContext->m_Reflection.OnConstantBufferBinding(cbname, slot); +} + +void ToMetal::DeclareBufferVariable(const Declaration *psDecl, bool isRaw, bool isUAV) +{ + uint32_t regNo = psDecl->asOperands[0].ui32RegisterNumber; + std::string BufName, BufType, BufConst; + + BufName = ""; + BufType = ""; + BufConst = ""; + + BufName = ResourceName(isUAV ? RGROUP_UAV : RGROUP_TEXTURE, regNo); + + if (!isRaw) // declare struct containing uint array when needed + { + std::ostringstream typeoss; + BufType = BufName + "_Type"; + typeoss << "uint value["; + typeoss << psDecl->ui32BufferStride / 4 << "]"; + m_StructDefinitions[BufType].m_Members.push_back(std::make_pair("value", typeoss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(BufType); + } + + if (!psContext->psDependencies->IsMemberDeclared(BufName)) + { + std::ostringstream oss; + + if (!isUAV || ((psDecl->sUAV.ui32AccessFlags & ACCESS_FLAG_WRITE) == 0)) + { + BufConst = "const "; + oss << BufConst; + } + + if (isRaw) + oss << "device uint *" << BufName; + else + oss << "device " << BufType << " *" << BufName; + + uint32_t loc = m_BufferSlots.GetBindingSlot(regNo, isUAV ? BindingSlotAllocator::RWBuffer : BindingSlotAllocator::Texture); + oss << " [[ buffer(" << loc << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(BufName, oss.str())); + + // We don't do REAL reflection here, we need to collect all data and figure out if we're dealing with counters. + // And if so - we need to patch counter binding info, add counters to empty slots, etc + const BufferReflection br = { loc, isUAV, psDecl->sUAV.bCounter != 0 }; + m_BufferReflections.insert(std::make_pair(BufName, br)); + } +} + +static int ParseInlineSamplerWrapMode(const std::string& samplerName, const std::string& wrapName) +{ + int res = 0; + const bool hasWrap = (samplerName.find(wrapName) != std::string::npos); + if (!hasWrap) + return res; + + const bool hasU = (samplerName.find(wrapName + 'u') != std::string::npos); + const bool hasV = (samplerName.find(wrapName + 'v') != std::string::npos); + const bool hasW = (samplerName.find(wrapName + 'w') != std::string::npos); + + if (hasWrap) res |= 1; + if (hasU) res |= 2; + if (hasV) res |= 4; + if (hasW) res |= 8; + return res; +} + +static bool EmitInlineSampler(HLSLCrossCompilerContext* psContext, const std::string& name) +{ + // See if it's a sampler that goes with the texture, or an "inline" sampler + // where sampler states are hardcoded in the shader directly. + // + // The logic for "inline" samplers below must match what is recognized + // by other shader platforms in Unity (ParseInlineSamplerName function + // in the shader compiler). + + std::string samplerName(name); std::transform(samplerName.begin(), samplerName.end(), samplerName.begin(), ::tolower); + + // filter modes + const bool hasPoint = (samplerName.find("point") != std::string::npos); + const bool hasTrilinear = (samplerName.find("trilinear") != std::string::npos); + const bool hasLinear = (samplerName.find("linear") != std::string::npos); + const bool hasAnyFilter = hasPoint || hasTrilinear || hasLinear; + + // wrap modes + const int bitsClamp = ParseInlineSamplerWrapMode(samplerName, "clamp"); + const int bitsRepeat = ParseInlineSamplerWrapMode(samplerName, "repeat"); + const int bitsMirror = ParseInlineSamplerWrapMode(samplerName, "mirror"); + const int bitsMirrorOnce = ParseInlineSamplerWrapMode(samplerName, "mirroronce"); + + const bool hasAnyWrap = bitsClamp != 0 || bitsRepeat != 0 || bitsMirror != 0 || bitsMirrorOnce != 0; + + // depth comparison + const bool hasCompare = (samplerName.find("compare") != std::string::npos); + + // name must contain a filter mode and a wrap mode at least + if (!hasAnyFilter || !hasAnyWrap) + { + return false; + } + + // Starting with macOS 11/iOS 14, the metal compiler will warn about unused inline samplers, that might + // happen on mobile due to _mtl_xl_shadow_sampler workaround that's required for pre-GPUFamily3. + if (hasCompare && IsMobileTarget(psContext)) + return true; + + bstring str = GetEarlyMain(psContext); + bformata(str, "constexpr sampler %s(", name.c_str()); + + if (hasCompare) + bformata(str, "compare_func::greater_equal,"); + + if (hasTrilinear) + bformata(str, "filter::linear,mip_filter::linear,"); + else if (hasLinear) + bformata(str, "filter::linear,mip_filter::nearest,"); + else + bformata(str, "filter::nearest,"); + + const char* kTexWrapClamp = "clamp_to_edge"; + const char* kTexWrapRepeat = "repeat"; + const char* kTexWrapMirror = "mirrored_repeat"; + const char* kTexWrapMirrorOnce = "mirrored_repeat"; // currently Metal shading language does not have syntax for inline sampler state that would do "mirror clamp to edge" + const char* wrapU = kTexWrapRepeat; + const char* wrapV = kTexWrapRepeat; + const char* wrapW = kTexWrapRepeat; + + if (bitsClamp == 1) wrapU = wrapV = wrapW = kTexWrapClamp; + else if (bitsRepeat == 1) wrapU = wrapV = wrapW = kTexWrapRepeat; + else if (bitsMirrorOnce == 1) wrapU = wrapV = wrapW = kTexWrapMirrorOnce; + else if (bitsMirror == 1) wrapU = wrapV = wrapW = kTexWrapMirror; + + if ((bitsClamp & 2) != 0) wrapU = kTexWrapClamp; + if ((bitsClamp & 4) != 0) wrapV = kTexWrapClamp; + if ((bitsClamp & 8) != 0) wrapW = kTexWrapClamp; + + if ((bitsRepeat & 2) != 0) wrapU = kTexWrapRepeat; + if ((bitsRepeat & 4) != 0) wrapV = kTexWrapRepeat; + if ((bitsRepeat & 8) != 0) wrapW = kTexWrapRepeat; + + if ((bitsMirrorOnce & 2) != 0) wrapU = kTexWrapMirrorOnce; + if ((bitsMirrorOnce & 4) != 0) wrapV = kTexWrapMirrorOnce; + if ((bitsMirrorOnce & 8) != 0) wrapW = kTexWrapMirrorOnce; + + if ((bitsMirror & 2) != 0) wrapU = kTexWrapMirror; + if ((bitsMirror & 4) != 0) wrapV = kTexWrapMirror; + if ((bitsMirror & 8) != 0) wrapW = kTexWrapMirror; + + if (wrapU == wrapV && wrapU == wrapW) + bformata(str, "address::%s", wrapU); + else + bformata(str, "s_address::%s,t_address::%s,r_address::%s", wrapU, wrapV, wrapW); + + bformata(str, ");\n"); + + return true; +} + +void ToMetal::TranslateDeclaration(const Declaration* psDecl) +{ + bstring glsl = *psContext->currentGLSLString; + Shader* psShader = psContext->psShader; + + switch (psDecl->eOpcode) + { + case OPCODE_DCL_INPUT_SGV: + case OPCODE_DCL_INPUT_PS_SGV: + DeclareBuiltinInput(psDecl); + break; + case OPCODE_DCL_OUTPUT_SIV: + DeclareBuiltinOutput(psDecl); + break; + case OPCODE_DCL_INPUT: + case OPCODE_DCL_INPUT_PS_SIV: + case OPCODE_DCL_INPUT_SIV: + case OPCODE_DCL_INPUT_PS: + { + const Operand* psOperand = &psDecl->asOperands[0]; + + if ((psOperand->eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID) || + (psOperand->eType == OPERAND_TYPE_INPUT_FORK_INSTANCE_ID)) + { + break; + } + + // No need to declare patch constants read again by the hull shader. + if ((psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + // ...or control points + if ((psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT) && psContext->psShader->eShaderType == HULL_SHADER) + { + break; + } + + //Already declared as part of an array. + if (psDecl->eOpcode == OPCODE_DCL_INPUT && psShader->aIndexedInput[psOperand->GetRegisterSpace(psContext)][psDecl->asOperands[0].ui32RegisterNumber] == -1) + { + break; + } + + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + uint32_t ui32CompMask = psDecl->asOperands[0].ui32CompMask; + + std::string name = psContext->GetDeclaredInputName(psOperand, nullptr, 1, nullptr); + + // NB: unlike GL we keep arrays of 2-component vectors as is (without collapsing into float4) + // if(psShader->aIndexedInput[0][psDecl->asOperands[0].ui32RegisterNumber] == -1) + // break; + + // Already declared? + if ((ui32CompMask != 0) && ((ui32CompMask & ~psShader->acInputDeclared[0][ui32Reg]) == 0)) + { + ASSERT(0); // Catch this + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_COVERAGE_MASK) + { + std::ostringstream oss; + oss << "uint " << name << " [[ sample_mask ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_GROUP_ID) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ threadgroup_position_in_grid ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP) + { + std::ostringstream oss; + oss << "uint3 " << name << " [[ thread_position_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eSpecialName == NAME_RENDER_TARGET_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ render_target_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_DOMAIN_POINT) + { + std::ostringstream oss; + std::string patchPositionType = psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_QUAD ? "float2 " : "float3 "; + oss << patchPositionType << name << " [[ position_in_patch ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eType == OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED) + { + std::ostringstream oss; + oss << "uint " << name << " [[ thread_index_in_threadgroup ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + if (psOperand->eSpecialName == NAME_VIEWPORT_ARRAY_INDEX) + { + std::ostringstream oss; + oss << "uint " << name << " [[ viewport_array_index ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + + if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS_SIV && psOperand->eSpecialName == NAME_POSITION) + { + m_StructDefinitions[""].m_Members.push_back(std::make_pair("mtl_FragCoord", "float4 mtl_FragCoord [[ position ]]")); + bcatcstr(GetEarlyMain(psContext), "float4 hlslcc_FragCoord = float4(mtl_FragCoord.xyz, 1.0/mtl_FragCoord.w);\n"); + break; + } + + if (psContext->psDependencies) + { + if (psShader->eShaderType == PIXEL_SHADER) + { + psContext->psDependencies->SetInterpolationMode(ui32Reg, psDecl->value.eInterpolation); + } + } + + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + const ShaderInfo::InOutSignature *psSig = NULL; + + // This falls within the specified index ranges. The default is 0 if no input range is specified + if (regSpace == 0) + psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, ui32CompMask, &psSig); + + if (!psSig) + break; + + // fragment shader cannot reference builtins generated by vertex program (with obvious exception of position) + // TODO: some visible error? handle more builtins? + if (psContext->psShader->eShaderType == PIXEL_SHADER && !strncmp(psSig->semanticName.c_str(), "PSIZE", 5)) + break; + + int iNumComponents = psOperand->GetNumInputElements(psContext); + psShader->acInputDeclared[0][ui32Reg] = (char)psSig->ui32Mask; + + std::string typeName = BuildOperandTypeString(psOperand->eMinPrecision, psSig->eComponentType, iNumComponents); + + std::string semantic; + if (psContext->psShader->eShaderType == VERTEX_SHADER || psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + std::ostringstream oss; + // VERTEX_SHADER hardcoded on purpose + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psShader->maxSemanticIndex); + oss << "attribute(" << loc << ")"; + semantic = oss.str(); + psContext->m_Reflection.OnInputBinding(name, loc); + } + else + { + std::ostringstream oss; + + // UNITY_FRAMEBUFFER_FETCH_AVAILABLE + // special case mapping for inout color, see HLSLSupport.cginc + if (psOperand->iPSInOut && name.size() == 10 && !strncmp(name.c_str(), "SV_Target", 9)) + { + // Metal allows color(X) declared in input/output structs + oss << "color(xlt_remap_i[" << psSig->ui32SemanticIndex << "])"; + m_NeedFBInputRemapDecl = true; + } + else + { + oss << "user(" << name << ")"; + } + semantic = oss.str(); + } + + std::string interpolation = ""; + if (psDecl->eOpcode == OPCODE_DCL_INPUT_PS) + { + interpolation = GetInterpolationString(psDecl->value.eInterpolation); + } + + std::string declString; + if ((OPERAND_INDEX_DIMENSION)psOperand->iIndexDims == INDEX_2D && psOperand->eType != OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType != HULL_SHADER) + { + std::ostringstream oss; + oss << typeName << " " << name << " [ " << psOperand->aui32ArraySizes[0] << " ] "; + + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + else + { + std::ostringstream oss; + oss << typeName << " " << name; + if (psContext->psShader->eShaderType != HULL_SHADER) + oss << " [[ " << semantic << " ]] " << interpolation; + declString = oss.str(); + } + + if (psOperand->eType == OPERAND_TYPE_INPUT_PATCH_CONSTANT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_PatchConstant"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psOperand->eType == OPERAND_TYPE_INPUT_CONTROL_POINT && psContext->psShader->eShaderType == DOMAIN_SHADER) + { + m_StructDefinitions["Mtl_ControlPoint"].m_Members.push_back(std::make_pair(name, declString)); + } + else if (psContext->psShader->eShaderType == HULL_SHADER) + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } + else + { + m_StructDefinitions[GetInputStructName()].m_Members.push_back(std::make_pair(name, declString)); + } + + HandleInputRedirect(psDecl, BuildOperandTypeString(psOperand->eMinPrecision, INOUT_COMPONENT_FLOAT32, 4)); + break; + } + case OPCODE_DCL_TEMPS: + { + uint32_t i = 0; + const uint32_t ui32NumTemps = psDecl->value.ui32NumTemps; + for (i = 0; i < ui32NumTemps; i++) + { + if (psShader->psFloatTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT, psShader->psFloatTempSizes[i]), i); + if (psShader->psFloat16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT16, psShader->psFloat16TempSizes[i]), i); + if (psShader->psFloat10TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "10_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_FLOAT10, psShader->psFloat10TempSizes[i]), i); + if (psShader->psIntTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT, psShader->psIntTempSizes[i]), i); + if (psShader->psInt16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT16, psShader->psInt16TempSizes[i]), i); + if (psShader->psInt12TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "i12_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_INT12, psShader->psInt12TempSizes[i]), i); + if (psShader->psUIntTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT, psShader->psUIntTempSizes[i]), i); + if (psShader->psUInt16TempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "u16_%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_UINT16, psShader->psUInt16TempSizes[i]), i); + if (psShader->fp64 && (psShader->psDoubleTempSizes[i] != 0)) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "d%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_DOUBLE, psShader->psDoubleTempSizes[i]), i); + if (psShader->psBoolTempSizes[i] != 0) + bformata(GetEarlyMain(psContext), "%s " HLSLCC_TEMP_PREFIX "b%d;\n", HLSLcc::GetConstructorForType(psContext, SVT_BOOL, psShader->psBoolTempSizes[i]), i); + } + break; + } + case OPCODE_SPECIAL_DCL_IMMCONST: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPCODE_DCL_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psDecl->asOperands[0].aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + + if (psCBuf->name.substr(0, 20) == "hlslcc_SubpassInput_" && psCBuf->name.length() >= 23 && !psCBuf->asVars.empty()) + { + // Special case for framebuffer fetch. + char ty = psCBuf->name[20]; + int idx = psCBuf->name[22] - '0'; + + const ShaderVar &sv = psCBuf->asVars[0]; + if (sv.name.substr(0, 15) == "hlslcc_fbinput_") + { + // Pick up the type and index + std::ostringstream oss; + m_NeedFBInputRemapDecl = true; + switch (ty) + { + case 'f': + case 'F': + oss << "float4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'h': + case 'H': + oss << "half4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'i': + case 'I': + oss << "int4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + case 'u': + case 'U': + oss << "uint4 " << sv.name << " [[ color(xlt_remap_i[" << idx << "]) ]]"; + m_StructDefinitions[""].m_Members.push_back(std::make_pair(sv.name, oss.str())); + break; + default: + break; + } + } + // Break out so this doesn't get declared. + break; + } + + DeclareConstantBuffer(psCBuf, psDecl->asOperands[0].aui32ArraySizes[0]); + break; + } + case OPCODE_DCL_RESOURCE: + { + DeclareResource(psDecl); + break; + } + case OPCODE_DCL_OUTPUT: + { + DeclareOutput(psDecl); + break; + } + + case OPCODE_DCL_GLOBAL_FLAGS: + { + uint32_t ui32Flags = psDecl->value.ui32GlobalFlags; + + if (ui32Flags & GLOBAL_FLAG_FORCE_EARLY_DEPTH_STENCIL && psContext->psShader->eShaderType == PIXEL_SHADER) + { + psShader->sInfo.bEarlyFragmentTests = true; + } + if (!(ui32Flags & GLOBAL_FLAG_REFACTORING_ALLOWED)) + { + //TODO add precise + //HLSL precise - http://msdn.microsoft.com/en-us/library/windows/desktop/hh447204(v=vs.85).aspx + } + if (ui32Flags & GLOBAL_FLAG_ENABLE_DOUBLE_PRECISION_FLOAT_OPS) + { + // Not supported on Metal +// psShader->fp64 = 1; + } + break; + } + case OPCODE_DCL_THREAD_GROUP: + { + // Send this info to reflecion: Metal gives this at runtime as a param + psContext->m_Reflection.OnThreadGroupSize(psDecl->value.aui32WorkGroupSize[0], + psDecl->value.aui32WorkGroupSize[1], + psDecl->value.aui32WorkGroupSize[2]); + break; + } + case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + { + psContext->psShader->sInfo.eTessOutPrim = psDecl->value.eTessOutPrim; + if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CCW; + else if (psContext->psShader->sInfo.eTessOutPrim == TESSELLATOR_OUTPUT_TRIANGLE_CCW) + psContext->psShader->sInfo.eTessOutPrim = TESSELLATOR_OUTPUT_TRIANGLE_CW; + } + break; + } + case OPCODE_DCL_TESS_DOMAIN: + { + psContext->psShader->sInfo.eTessDomain = psDecl->value.eTessDomain; + + if (psContext->psShader->sInfo.eTessDomain == TESSELLATOR_DOMAIN_ISOLINE) + psContext->m_Reflection.OnDiagnostics("Metal Tessellation: domain(\"isoline\") not supported.", 0, true); + break; + } + case OPCODE_DCL_TESS_PARTITIONING: + { + psContext->psShader->sInfo.eTessPartitioning = psDecl->value.eTessPartitioning; + break; + } + case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY: + { + // Not supported + break; + } + case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT: + { + // Not supported + break; + } + case OPCODE_DCL_GS_INPUT_PRIMITIVE: + { + // Not supported + break; + } + case OPCODE_DCL_INTERFACE: + { + // Are interfaces ever even used? + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPCODE_DCL_FUNCTION_TABLE: + { + ASSERT(0); + break; + } + case OPCODE_CUSTOMDATA: + { + // TODO: This is only ever accessed as a float currently. Do trickery if we ever see ints accessed from an array. + // Walk through all the chunks we've seen in this phase. + + bstring glsl = *psContext->currentGLSLString; + bformata(glsl, "constant float4 ImmCB_%d[%d] =\n{\n", psContext->currentPhase, psDecl->asImmediateConstBuffer.size()); + bool isFirst = true; + std::for_each(psDecl->asImmediateConstBuffer.begin(), psDecl->asImmediateConstBuffer.end(), [&](const ICBVec4 &data) + { + if (!isFirst) + { + bcatcstr(glsl, ",\n"); + } + isFirst = false; + + float val[4] = { + *(float*)&data.a, + *(float*)&data.b, + *(float*)&data.c, + *(float*)&data.d + }; + + bformata(glsl, "\tfloat4("); + for (uint32_t k = 0; k < 4; k++) + { + if (k != 0) + bcatcstr(glsl, ", "); + if (fpcheck(val[k])) + bformata(glsl, "as_type(0x%Xu)", *(uint32_t *)&val[k]); + else + HLSLcc::PrintFloat(glsl, val[k]); + } + bcatcstr(glsl, ")"); + }); + bcatcstr(glsl, "\n};\n"); + break; + } + case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT: + case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT: + break; // Nothing to do + + case OPCODE_DCL_INDEXABLE_TEMP: + { + const uint32_t ui32RegIndex = psDecl->sIdxTemp.ui32RegIndex; + const uint32_t ui32RegCount = psDecl->sIdxTemp.ui32RegCount; + const uint32_t ui32RegComponentSize = psDecl->sIdxTemp.ui32RegComponentSize; + bformata(GetEarlyMain(psContext), "float%d TempArray%d[%d];\n", ui32RegComponentSize, ui32RegIndex, ui32RegCount); + break; + } + case OPCODE_DCL_INDEX_RANGE: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_INPUT: + { + const ShaderInfo::InOutSignature* psSignature = NULL; + const char* type = "float"; + uint32_t startReg = 0; + uint32_t i; + bstring *oldString; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + int isInput = psDecl->asOperands[0].eType == OPERAND_TYPE_INPUT ? 1 : 0; + + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + type = "uint"; + break; + } + case INOUT_COMPONENT_SINT32: + { + type = "int"; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + break; + } + default: + ASSERT(0); + break; + } + + switch (psSignature->eMinPrec) // TODO What if the inputs in the indexed range are of different precisions? + { + default: + break; + case MIN_PRECISION_ANY_16: + ASSERT(0); // Wut? + break; + case MIN_PRECISION_FLOAT_16: + case MIN_PRECISION_FLOAT_2_8: + type = "half"; + break; + case MIN_PRECISION_SINT_16: + type = "short"; + break; + case MIN_PRECISION_UINT_16: + type = "ushort"; + break; + } + + startReg = psDecl->asOperands[0].ui32RegisterNumber; + oldString = psContext->currentGLSLString; + psContext->currentGLSLString = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain; + psContext->AddIndentation(); + bformata(psContext->psShader->asPhases[psContext->currentPhase].earlyMain, "%s4 phase%d_%sput%d_%d[%d];\n", type, psContext->currentPhase, isInput ? "In" : "Out", regSpace, startReg, psDecl->value.ui32IndexRange); + glsl = isInput ? psContext->psShader->asPhases[psContext->currentPhase].earlyMain : psContext->psShader->asPhases[psContext->currentPhase].postShaderCode; + psContext->currentGLSLString = &glsl; + if (isInput == 0) + psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1; + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + int dummy = 0; + std::string realName; + uint32_t destMask = psDecl->asOperands[0].ui32CompMask; + uint32_t rebase = 0; + const ShaderInfo::InOutSignature *psSig = NULL; + uint32_t regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + + if (regSpace == 0) + if (isInput) + psContext->psShader->sInfo.GetInputSignatureFromRegister(startReg + i, destMask, &psSig); + else + psContext->psShader->sInfo.GetOutputSignatureFromRegister(startReg + i, destMask, 0, &psSig); + else + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(startReg + i, destMask, &psSig); + + ASSERT(psSig != NULL); + + if ((psSig->ui32Mask & destMask) == 0) + continue; // Skip dummy writes (vec2 texcoords get filled to vec4 with zeroes etc) + + while ((psSig->ui32Mask & (1 << rebase)) == 0) + rebase++; + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg + i; + + if (isInput) + { + realName = psContext->GetDeclaredInputName(&psDecl->asOperands[0], &dummy, 1, NULL); + + psContext->AddIndentation(); + bformata(glsl, "phase%d_Input%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + + // for some reason input struct is missed here from GetDeclaredInputName result, so add it manually + bformata(glsl, " = input.%s", realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + } + else + { + realName = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], &dummy, NULL, NULL, 0); + + psContext->AddIndentation(); + bcatcstr(glsl, realName.c_str()); + if (destMask != OPERAND_4_COMPONENT_MASK_ALL && destMask != psSig->ui32Mask) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k - rebase]); + } + } + } + + bformata(glsl, " = phase%d_Output%d_%d[%d]", psContext->currentPhase, regSpace, startReg, i); + + if (destMask != OPERAND_4_COMPONENT_MASK_ALL) + { + int k; + const char *swizzle = "xyzw"; + bcatcstr(glsl, "."); + for (k = 0; k < 4; k++) + { + if ((destMask & (1 << k)) && (psSig->ui32Mask & (1 << k))) + { + bformata(glsl, "%c", swizzle[k]); + } + } + } + } + + bcatcstr(glsl, ";\n"); + } + + ((Declaration *)psDecl)->asOperands[0].ui32RegisterNumber = startReg; + psContext->currentGLSLString = oldString; + glsl = *psContext->currentGLSLString; + + for (i = 0; i < psDecl->value.ui32IndexRange; i++) + { + if (regSpace == 0) + { + if (isInput) + psShader->sInfo.GetInputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + &psSignature); + else + psShader->sInfo.GetOutputSignatureFromRegister( + psDecl->asOperands[0].ui32RegisterNumber + i, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + } + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(psDecl->asOperands[0].ui32RegisterNumber + i, psDecl->asOperands[0].ui32CompMask, &psSignature); + + ASSERT(psSignature != NULL); + + ((ShaderInfo::InOutSignature *)psSignature)->isIndexed.insert(psContext->currentPhase); + ((ShaderInfo::InOutSignature *)psSignature)->indexStart[psContext->currentPhase] = startReg; + ((ShaderInfo::InOutSignature *)psSignature)->index[psContext->currentPhase] = i; + } + + + break; + } + default: + // TODO Input index ranges. + ASSERT(0); + } + break; + } + + case OPCODE_HS_DECLS: + { + // Not supported + break; + } + case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessInputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + else if (psContext->psShader->eShaderType == DOMAIN_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + break; + } + case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT: + { + if (psContext->psShader->eShaderType == HULL_SHADER) + psShader->sInfo.ui32TessOutputControlPointCount = psDecl->value.ui32MaxOutputVertexCount; + break; + } + case OPCODE_HS_FORK_PHASE: + { + // Not supported + break; + } + case OPCODE_HS_JOIN_PHASE: + { + // Not supported + break; + } + case OPCODE_DCL_SAMPLER: + { + std::string name = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + + if (!EmitInlineSampler(psContext, name)) + { + // for some reason we have some samplers start with "sampler" and some not + const bool startsWithSampler = name.find("sampler") == 0; + + std::ostringstream samplerOss; + samplerOss << (startsWithSampler ? "" : "sampler") << name; + std::string samplerName = samplerOss.str(); + + if (!psContext->psDependencies->IsMemberDeclared(samplerName)) + { + const uint32_t slot = m_SamplerSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::ostringstream oss; + oss << "sampler " << samplerName << " [[ sampler (" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(samplerName, oss.str())); + + SamplerDesc desc = { name, psDecl->asOperands[0].ui32RegisterNumber, slot }; + m_Samplers.push_back(desc); + } + } + + break; + } + case OPCODE_DCL_HS_MAX_TESSFACTOR: + { + if (psContext->psShader->eShaderType == HULL_SHADER && psContext->psDependencies) + psContext->psDependencies->fMaxTessFactor = psDecl->value.fMaxTessFactor; + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED: + { + // A hack to support single component 32bit RWBuffers: Declare as raw buffer. + // TODO: Use textures for RWBuffers when the scripting API has actual format selection etc + // way to flag the created ComputeBuffer as typed. Even then might want to leave this + // hack path for 32bit (u)int typed buffers to continue support atomic ops on those formats. + if (psDecl->value.eResourceDimension == RESOURCE_DIMENSION_BUFFER) + { + DeclareBufferVariable(psDecl, true, true); + break; + } + std::string texName = ResourceName(RGROUP_UAV, psDecl->asOperands[0].ui32RegisterNumber); + std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, false, true); + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::UAV); + + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + + HLSLCC_TEX_DIMENSION texDim = TD_INT; + switch (psDecl->value.eResourceDimension) + { + default: break; + case RESOURCE_DIMENSION_TEXTURE2D: + case RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = TD_2D; + break; + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + texDim = TD_2DARRAY; + break; + case RESOURCE_DIMENSION_TEXTURE3D: + texDim = TD_3D; + break; + case RESOURCE_DIMENSION_TEXTURECUBE: + texDim = TD_CUBE; + break; + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = TD_CUBEARRAY; + break; + } + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, false, false, true}; + m_Textures.push_back(desc); + } + break; + } + + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED: + { + DeclareBufferVariable(psDecl, false, true); + break; + } + case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW: + { + DeclareBufferVariable(psDecl, true, true); + break; + } + case OPCODE_DCL_RESOURCE_STRUCTURED: + { + DeclareBufferVariable(psDecl, false, false); + break; + } + case OPCODE_DCL_RESOURCE_RAW: + { + DeclareBufferVariable(psDecl, true, false); + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + std::ostringstream oss; + oss << "uint value[" << psDecl->sTGSM.ui32Stride / 4 << "]"; + m_StructDefinitions[TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"].m_Members.push_back(std::make_pair("value", oss.str())); + m_StructDefinitions[""].m_Dependencies.push_back(TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + "_Type"); + oss.str(""); + oss << "threadgroup " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "_Type " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << psDecl->sTGSM.ui32Count << "]"; + + bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); + psVarType->name = "$Element"; + + psVarType->Columns = psDecl->sTGSM.ui32Stride / 4; + psVarType->Elements = psDecl->sTGSM.ui32Count; + break; + } + case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW: + { + ShaderVarType* psVarType = &psShader->sInfo.sGroupSharedVarType[psDecl->asOperands[0].ui32RegisterNumber]; + + std::ostringstream oss; + oss << "threadgroup uint " << TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY) + << "[" << (psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride) << "]"; + + bformata(GetEarlyMain(psContext), "%s;\n", oss.str().c_str()); + psVarType->name = "$Element"; + + psVarType->Columns = 1; + psVarType->Elements = psDecl->sTGSM.ui32Count / psDecl->sTGSM.ui32Stride; + break; + } + + case OPCODE_DCL_STREAM: + { + // Not supported on Metal + break; + } + case OPCODE_DCL_GS_INSTANCE_COUNT: + { + // Not supported on Metal + break; + } + + default: + ASSERT(0); + break; + } +} + +std::string ToMetal::ResourceName(ResourceGroup group, const uint32_t ui32RegisterNumber) +{ + const ResourceBinding* psBinding = 0; + std::ostringstream oss; + int found; + + found = psContext->psShader->sInfo.GetResourceFromBindingPoint(group, ui32RegisterNumber, &psBinding); + + if (found) + { + size_t i = 0; + std::string name = psBinding->name; + uint32_t ui32ArrayOffset = ui32RegisterNumber - psBinding->ui32BindPoint; + + while (i < name.length()) + { + //array syntax [X] becomes _0_ + //Otherwise declarations could end up as: + //uniform sampler2D SomeTextures[0]; + //uniform sampler2D SomeTextures[1]; + if (name[i] == '[' || name[i] == ']') + name[i] = '_'; + + ++i; + } + + if (ui32ArrayOffset) + { + oss << name << ui32ArrayOffset; + return oss.str(); + } + else + { + return name; + } + } + else + { + oss << "UnknownResource" << ui32RegisterNumber; + return oss.str(); + } +} + +void ToMetal::TranslateResourceTexture(const Declaration* psDecl, uint32_t samplerCanDoShadowCmp, HLSLCC_TEX_DIMENSION texDim) +{ + std::string texName = ResourceName(RGROUP_TEXTURE, psDecl->asOperands[0].ui32RegisterNumber); + const bool isDepthSampler = (samplerCanDoShadowCmp && psDecl->ui32IsShadowTex); + std::string samplerTypeName = TranslateResourceDeclaration(psContext, psDecl, texName, isDepthSampler, false); + + bool isMS = false; + switch (psDecl->value.eResourceDimension) + { + default: + break; + case RESOURCE_DIMENSION_TEXTURE2DMS: + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + isMS = true; + break; + } + + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + + std::ostringstream oss; + oss << samplerTypeName << " " << texName << " [[ texture(" << slot << ") ]] "; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + + TextureSamplerDesc desc = {texName, (int)slot, -1, texDim, isMS, isDepthSampler, false}; + m_Textures.push_back(desc); + + if (isDepthSampler) + EnsureShadowSamplerDeclared(); + } +} + +void ToMetal::DeclareResource(const Declaration *psDecl) +{ + switch (psDecl->value.eResourceDimension) + { + case RESOURCE_DIMENSION_BUFFER: + { + // Fake single comp 32bit texel buffers by using raw buffer + DeclareBufferVariable(psDecl, true, false); + break; + + // TODO: re-enable this code for buffer textures when sripting API has proper support for it +#if 0 + if (!psContext->psDependencies->IsMemberDeclared(texName)) + { + uint32_t slot = m_TextureSlots.GetBindingSlot(psDecl->asOperands[0].ui32RegisterNumber, BindingSlotAllocator::Texture); + std::string texName = TranslateOperand(&psDecl->asOperands[0], TO_FLAG_NAME_ONLY); + std::ostringstream oss; + oss << "device " << TranslateResourceDeclaration(psContext, psDecl, texName, false, false); + + oss << texName << " [[ texture(" << slot << ") ]]"; + + m_StructDefinitions[""].m_Members.push_back(std::make_pair(texName, oss.str())); + psContext->m_Reflection.OnTextureBinding(texName, slot, TD_2D, false); //TODO: correct HLSLCC_TEX_DIMENSION? + } + break; +#endif + } + default: + ASSERT(0); + break; + + case RESOURCE_DIMENSION_TEXTURE1D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + TranslateResourceTexture(psDecl, 1, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMS: + { + TranslateResourceTexture(psDecl, 0, TD_2D); + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + TranslateResourceTexture(psDecl, 0, TD_3D); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + TranslateResourceTexture(psDecl, 1, TD_CUBE); + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); //TODO: correct HLSLCC_TEX_DIMENSION? + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + TranslateResourceTexture(psDecl, 0, TD_2DARRAY); + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + TranslateResourceTexture(psDecl, 1, TD_CUBEARRAY); + break; + } + } + psContext->psShader->aeResourceDims[psDecl->asOperands[0].ui32RegisterNumber] = psDecl->value.eResourceDimension; +} + +void ToMetal::DeclareOutput(const Declaration *psDecl) +{ + Shader* psShader = psContext->psShader; + + if (!psContext->OutputNeedsDeclaring(&psDecl->asOperands[0], 1)) + return; + + const Operand* psOperand = &psDecl->asOperands[0]; + int iNumComponents; + int regSpace = psDecl->asOperands[0].GetRegisterSpace(psContext); + uint32_t ui32Reg = psDecl->asOperands[0].ui32RegisterNumber; + + const ShaderInfo::InOutSignature* psSignature = NULL; + SHADER_VARIABLE_TYPE cType = SVT_VOID; + + if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL || + psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL) + { + iNumComponents = 1; + cType = SVT_FLOAT; + } + else + { + if (regSpace == 0) + psShader->sInfo.GetOutputSignatureFromRegister( + ui32Reg, + psDecl->asOperands[0].ui32CompMask, + psShader->ui32CurrentVertexOutputStream, + &psSignature); + else + psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Reg, psDecl->asOperands[0].ui32CompMask, &psSignature); + + iNumComponents = HLSLcc::GetNumberBitsSet(psSignature->ui32Mask); + + switch (psSignature->eComponentType) + { + case INOUT_COMPONENT_UINT32: + { + cType = SVT_UINT; + break; + } + case INOUT_COMPONENT_SINT32: + { + cType = SVT_INT; + break; + } + case INOUT_COMPONENT_FLOAT32: + { + cType = SVT_FLOAT; + break; + } + default: + ASSERT(0); + break; + } + // Don't set this for oDepth (or variants), because depth output register is in separate space from other outputs (regno 0, but others may overlap with that) + if (iNumComponents == 1) + psContext->psShader->abScalarOutput[regSpace][ui32Reg] |= (int)psDecl->asOperands[0].ui32CompMask; + + switch (psOperand->eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + cType = SVT_FLOAT16; + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + cType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_SINT_16: + cType = SVT_INT16; + break; + case OPERAND_MIN_PRECISION_UINT_16: + cType = SVT_UINT16; + break; + } + } + + std::string type = HLSLcc::GetConstructorForTypeMetal(cType, iNumComponents); + std::string name = psContext->GetDeclaredOutputName(&psDecl->asOperands[0], nullptr, nullptr, nullptr, 1); + + switch (psShader->eShaderType) + { + case PIXEL_SHADER: + { + switch (psDecl->asOperands[0].eType) + { + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + std::ostringstream oss; + oss << type << " " << name << " [[ sample_mask ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(any) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(greater) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + std::ostringstream oss; + oss << type << " " << name << " [[ depth(less) ]]"; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + break; + } + default: + { + std::ostringstream oss; + oss << type << " " << name << " [[ color(xlt_remap_o[" << psSignature->ui32SemanticIndex << "]) ]]"; + m_NeedFBOutputRemapDecl = true; + m_StructDefinitions[GetOutputStructName()].m_Members.push_back(std::make_pair(name, oss.str())); + psContext->m_Reflection.OnFragmentOutputDeclaration(iNumComponents, psSignature->ui32SemanticIndex); + } + } + break; + } + case VERTEX_SHADER: + case DOMAIN_SHADER: + case HULL_SHADER: + { + std::string out = GetOutputStructName(); + bool isTessKernel = (psContext->flags & HLSLCC_FLAG_METAL_TESSELLATION) != 0 && (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == VERTEX_SHADER); + + std::ostringstream oss; + oss << type << " " << name; + if (!isTessKernel && (psSignature->eSystemValueType == NAME_POSITION || psSignature->semanticName == "POS") && psOperand->ui32RegisterNumber == 0) + oss << " [[ position ]]"; + else if (!isTessKernel && psSignature->eSystemValueType == NAME_UNDEFINED && psSignature->semanticName == "PSIZE" && psSignature->ui32SemanticIndex == 0) + oss << " [[ point_size ]]"; + else + oss << " [[ user(" << name << ") ]]"; + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + + if (psContext->psShader->eShaderType == VERTEX_SHADER) + psContext->m_Reflection.OnVertexProgramOutput(name, psSignature->semanticName, psSignature->ui32SemanticIndex); + + // For preserving data layout, declare output struct as domain shader input, too + if (psContext->psShader->eShaderType == HULL_SHADER) + { + out += "In"; + + std::ostringstream oss; + oss << type << " " << name; + + // VERTEX_SHADER hardcoded on purpose + bool keepLocation = ((psContext->flags & HLSLCC_FLAG_KEEP_VARYING_LOCATIONS) != 0); + uint32_t loc = psContext->psDependencies->GetVaryingLocation(name, VERTEX_SHADER, true, keepLocation, psContext->psShader->maxSemanticIndex); + oss << " [[ " << "attribute(" << loc << ")" << " ]] "; + + psContext->m_Reflection.OnInputBinding(name, loc); + m_StructDefinitions[out].m_Members.push_back(std::make_pair(name, oss.str())); + } + break; + } + case GEOMETRY_SHADER: + default: + ASSERT(0); + break; + } + HandleOutputRedirect(psDecl, HLSLcc::GetConstructorForTypeMetal(cType, 4)); +} + +void ToMetal::EnsureShadowSamplerDeclared() +{ + // on macos we will set comparison func from the app side + if (m_ShadowSamplerDeclared || !IsMobileTarget(psContext)) + return; + + if ((psContext->flags & HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR) != 0 || (psContext->psShader->eShaderType == COMPUTE_SHADER)) + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::linear, compare_func::greater_equal);\n"; + else + m_ExtraGlobalDefinitions += "constexpr sampler _mtl_xl_shadow_sampler(address::clamp_to_edge, filter::nearest, compare_func::greater_equal);\n"; + m_ShadowSamplerDeclared = true; +} diff --git a/third_party/HLSLcc/src/toMetalInstruction.cpp b/third_party/HLSLcc/src/toMetalInstruction.cpp new file mode 100644 index 0000000..a4c0645 --- /dev/null +++ b/third_party/HLSLcc/src/toMetalInstruction.cpp @@ -0,0 +1,4096 @@ +#include "internal_includes/toMetal.h" +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/languages.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "bstrlib.h" +#include "stdio.h" +#include +#include +#include +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/Instruction.h" +#include "hlslcc.h" + +using namespace HLSLcc; + +bstring operator<<(bstring a, const std::string &b) +{ + bcatcstr(a, b.c_str()); + return a; +} + +static void AddOpAssignToDest(bstring glsl, SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, SHADER_VARIABLE_TYPE eDestType, uint32_t ui32DestElementCount, uint32_t precise, int& numParenthesis, bool allowReinterpretCast = true) +{ + numParenthesis = 0; + + // Find out from type the precisions and types without precision + RESOURCE_RETURN_TYPE srcBareType = SVTTypeToResourceReturnType(eSrcType); + RESOURCE_RETURN_TYPE dstBareType = SVTTypeToResourceReturnType(eDestType); + REFLECT_RESOURCE_PRECISION srcPrec = SVTTypeToPrecision(eSrcType); + REFLECT_RESOURCE_PRECISION dstPrec = SVTTypeToPrecision(eDestType); + + // Add assigment + bcatcstr(glsl, " = "); + + /* TODO: implement precise for metal + if (precise) + { + bcatcstr(glsl, "u_xlat_precise("); + numParenthesis++; + }*/ + + // Special reinterpret cast between float<->uint/int if size matches + // TODO: Handle bools? + if (srcBareType != dstBareType && (srcBareType == RETURN_TYPE_FLOAT || dstBareType == RETURN_TYPE_FLOAT) && srcPrec == dstPrec && allowReinterpretCast) + { + bformata(glsl, "as_type<%s>(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); + numParenthesis++; + if (ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eSrcType, ui32DestElementCount)); + numParenthesis++; + } + return; + } + + // Do cast in case of type missmatch or dimension + if (eSrcType != eDestType || ui32DestElementCount > ui32SrcElementCount) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(eDestType, ui32DestElementCount)); + numParenthesis++; + return; + } +} + +// This function prints out the destination name, possible destination writemask, assignment operator +// and any possible conversions needed based on the eSrcType+ui32SrcElementCount (type and size of data expected to be coming in) +// As an output, pNeedsParenthesis will be filled with the amount of closing parenthesis needed +// and pSrcCount will be filled with the number of components expected +// ui32CompMask can be used to only write to 1 or more components (used by MOVC) +void ToMetal::AddOpAssignToDestWithMask(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis, uint32_t ui32CompMask) +{ + uint32_t ui32DestElementCount = psDest->GetNumSwizzleElements(ui32CompMask); + bstring glsl = *psContext->currentGLSLString; + SHADER_VARIABLE_TYPE eDestType = psDest->GetDataType(psContext); + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION, ui32CompMask); + AddOpAssignToDest(glsl, eSrcType, ui32SrcElementCount, eDestType, ui32DestElementCount, precise, numParenthesis, psContext->psShader->ui32MajorVersion > 3); +} + +void ToMetal::AddAssignToDest(const Operand* psDest, + SHADER_VARIABLE_TYPE eSrcType, uint32_t ui32SrcElementCount, uint32_t precise, int& numParenthesis) +{ + AddOpAssignToDestWithMask(psDest, eSrcType, ui32SrcElementCount, precise, numParenthesis, OPERAND_4_COMPONENT_MASK_ALL); +} + +void ToMetal::AddAssignPrologue(int numParenthesis) +{ + bstring glsl = *psContext->currentGLSLString; + while (numParenthesis != 0) + { + bcatcstr(glsl, ")"); + numParenthesis--; + } + bcatcstr(glsl, ";\n"); +} + +void ToMetal::AddComparison(Instruction* psInst, ComparisonType eType, + uint32_t typeFlag) +{ + // Multiple cases to consider here: + // OPCODE_LT, _GT, _NE etc: inputs are floats, outputs UINT 0xffffffff or 0. typeflag: TO_FLAG_NONE + // OPCODE_ILT, _IGT etc: comparisons are signed ints, outputs UINT 0xffffffff or 0 typeflag TO_FLAG_INTEGER + // _ULT, UGT etc: inputs unsigned ints, outputs UINTs typeflag TO_FLAG_UNSIGNED_INTEGER + // + + + bstring glsl = *psContext->currentGLSLString; + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t s0ElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + const uint32_t s1ElemCount = psInst->asOperands[2].GetNumSwizzleElements(); + int isBoolDest = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + const uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + if (typeFlag == TO_FLAG_NONE + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlag = TO_FLAG_FORCE_HALF; + ASSERT(s0ElemCount == s1ElemCount || s1ElemCount == 1 || s0ElemCount == 1); + if ((s0ElemCount != s1ElemCount) && (destElemCount > 1)) + { + // Set the proper auto-expand flag is either argument is scalar + typeFlag |= (TO_AUTO_EXPAND_TO_VEC2 << (std::min(std::max(s0ElemCount, s1ElemCount), destElemCount) - 2)); + } + if (destElemCount > 1) + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); + + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, destElemCount)); + bcatcstr(glsl, "("); + } + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + bcatcstr(glsl, ")"); + if (!isBoolDest) + { + bcatcstr(glsl, ")"); + bcatcstr(glsl, " * 0xFFFFFFFFu"); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + const char* glslOpcode[] = { + "==", + "<", + ">=", + "!=", + }; + + //Scalar compare + + psContext->AddIndentation(); + if (isBoolDest) + { + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION | TO_FLAG_BOOL); + bcatcstr(glsl, " = "); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, destElemCount, psInst->ui32PreciseMask, needsParenthesis); + bcatcstr(glsl, "("); + } + glsl << TranslateOperand(&psInst->asOperands[1], typeFlag, destMask); + bformata(glsl, "%s", glslOpcode[eType]); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlag, destMask); + if (!isBoolDest) + { + bcatcstr(glsl, ") ? 0xFFFFFFFFu : uint(0)"); + } + AddAssignPrologue(needsParenthesis); + } +} + +bool ToMetal::CanForceToHalfOperand(const Operand *psOperand) +{ + if (psOperand->GetDataType(psContext) == SVT_FLOAT16) + return true; + + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER) + { + for (int i = 0; i < psOperand->iNumComponents; i++) + { + float val = fabs(psOperand->afImmediates[i]); + // Do not allow forcing immediate value to half if value is beyond half min/max boundaries + if (val != 0 && (val > 65504 || val < 6.10352e-5)) + return false; + } + return true; + } + + return false; +} + +void ToMetal::AddMOVBinaryOp(const Operand *pDest, Operand *pSrc, uint32_t precise) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int srcSwizzleCount = pSrc->GetNumSwizzleElements(); + uint32_t writeMask = pDest->GetAccessMask(); + + const SHADER_VARIABLE_TYPE eSrcType = pSrc->GetDataType(psContext, pDest->GetDataType(psContext)); + uint32_t flags = SVTTypeToFlag(eSrcType); + + AddAssignToDest(pDest, eSrcType, srcSwizzleCount, precise, numParenthesis); + glsl << TranslateOperand(pSrc, flags, writeMask); + + AddAssignPrologue(numParenthesis); +} + +void ToMetal::AddMOVCBinaryOp(const Operand *pDest, const Operand *src0, Operand *src1, Operand *src2, uint32_t precise) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destElemCount = pDest->GetNumSwizzleElements(); + uint32_t s0ElemCount = src0->GetNumSwizzleElements(); + uint32_t s1ElemCount = src1->GetNumSwizzleElements(); + uint32_t s2ElemCount = src2->GetNumSwizzleElements(); + uint32_t destWriteMask = pDest->GetAccessMask(); + uint32_t destElem; + + const SHADER_VARIABLE_TYPE eDestType = pDest->GetDataType(psContext); + /* + for each component in dest[.mask] + if the corresponding component in src0 (POS-swizzle) + has any bit set + { + copy this component (POS-swizzle) from src1 into dest + } + else + { + copy this component (POS-swizzle) from src2 into dest + } + endfor + */ + + /* Single-component conditional variable (src0) */ + if (s0ElemCount == 1 || src0->IsSwizzleReplicated()) + { + int numParenthesis = 0; + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + psContext->AddIndentation(); + AddAssignToDest(pDest, eDestType, destElemCount, precise, numParenthesis); + bcatcstr(glsl, "("); + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_UINT, OPERAND_4_COMPONENT_MASK_X); + else if (s0Type == SVT_BOOL) + glsl << TranslateOperand(src0, TO_FLAG_BOOL, OPERAND_4_COMPONENT_MASK_X); + else + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, OPERAND_4_COMPONENT_MASK_X); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + if (s0Type == SVT_UINT || s0Type == SVT_UINT16) + bcatcstr(glsl, " != uint(0)) ? "); + else if (s0Type == SVT_BOOL) + bcatcstr(glsl, ") ? "); + else + bcatcstr(glsl, " != 0) ? "); + } + + if (s1ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), destWriteMask); + + bcatcstr(glsl, " : "); + if (s2ElemCount == 1 && destElemCount > 1) + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType) | ElemCountToAutoExpandFlag(destElemCount)); + else + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), destWriteMask); + + AddAssignPrologue(numParenthesis); + } + else + { + int srcElem = -1; + SHADER_VARIABLE_TYPE dstType = pDest->GetDataType(psContext); + SHADER_VARIABLE_TYPE s0Type = src0->GetDataType(psContext); + + // Use an extra temp if dest is also one of the sources. Without this some swizzle combinations + // might alter the source before all components are handled. + const std::string tempName = "hlslcc_movcTemp"; + bool dstIsSrc1 = (pDest->eType == src1->eType) + && (dstType == src1->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src1->ui32RegisterNumber); + bool dstIsSrc2 = (pDest->eType == src2->eType) + && (dstType == src2->GetDataType(psContext)) + && (pDest->ui32RegisterNumber == src2->ui32RegisterNumber); + + if (dstIsSrc1 || dstIsSrc2) + { + psContext->AddIndentation(); + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + int numComponents = (pDest->eType == OPERAND_TYPE_TEMP) ? + psContext->psShader->GetTempComponentCount(eDestType, pDest->ui32RegisterNumber) : + pDest->iNumComponents; + bformata(glsl, "%s %s = %s;\n", HLSLcc::GetConstructorForType(psContext, eDestType, numComponents), tempName.c_str(), TranslateOperand(pDest, TO_FLAG_NAME_ONLY).c_str()); + + // Override OPERAND_TYPE_TEMP name temporarily + const_cast(pDest)->specialName.assign(tempName); + } + + for (destElem = 0; destElem < 4; ++destElem) + { + int numParenthesis = 0; + srcElem++; + if (pDest->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && pDest->ui32CompMask != 0 && !(pDest->ui32CompMask & (1 << destElem))) + continue; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(pDest, eDestType, 1, precise, numParenthesis, 1 << destElem); + bcatcstr(glsl, "("); + if (s0Type == SVT_BOOL) + { + glsl << TranslateOperand(src0, TO_FLAG_BOOL, 1 << srcElem); + bcatcstr(glsl, ") ? "); + } + else + { + glsl << TranslateOperand(src0, TO_AUTO_BITCAST_TO_INT, 1 << srcElem); + + if (psContext->psShader->ui32MajorVersion < 4) + { + //cmp opcode uses >= 0 + bcatcstr(glsl, " >= 0) ? "); + } + else + { + bcatcstr(glsl, " != 0) ? "); + } + } + + glsl << TranslateOperand(src1, SVTTypeToFlag(eDestType), 1 << srcElem); + bcatcstr(glsl, " : "); + glsl << TranslateOperand(src2, SVTTypeToFlag(eDestType), 1 << srcElem); + AddAssignPrologue(numParenthesis); + } + + if (dstIsSrc1 || dstIsSrc2) + { + const_cast(pDest)->specialName.clear(); + + psContext->AddIndentation(); + glsl << TranslateOperand(pDest, TO_FLAG_NAME_ONLY); + bformata(glsl, " = %s;\n", tempName.c_str()); + + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } + } +} + +void ToMetal::CallBinaryOp(const char* name, Instruction* psInst, + int dest, int src0, int src1, SHADER_VARIABLE_TYPE eDataType) +{ + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int needsParenthesis = 0; + + if (eDataType == SVT_FLOAT + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) + { + ui32Flags = TO_FLAG_FORCE_HALF; + eDataType = SVT_FLOAT16; + } + + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + if (src1SwizCount != src0SwizCount) + { + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); + +/* bool s0NeedsUpscaling = false, s1NeedsUpscaling = false; + SHADER_VARIABLE_TYPE s0Type = psInst->asOperands[src0].GetDataType(psContext); + SHADER_VARIABLE_TYPE s1Type = psInst->asOperands[src1].GetDataType(psContext); + + if((s0Type == SVT_FLOAT10 || s0Type == SVT_FLOAT16) && (s1Type != s) + */ + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", name); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(needsParenthesis); +} + +void ToMetal::CallTernaryOp(const char* op1, const char* op2, Instruction* psInst, + int dest, int src0, int src1, int src2, uint32_t dataType) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = psInst->asOperands[dest].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + uint32_t ui32Flags = dataType; + int numParenthesis = 0; + + if (dataType == TO_FLAG_NONE + && CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) + ui32Flags = dataType = TO_FLAG_FORCE_HALF; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], TypeFlagsToSVTType(dataType), dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bformata(glsl, " %s ", op1); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bformata(glsl, " %s ", op2); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask, uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src2SwizCount = psInst->asOperands[src2].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1]) + && CanForceToHalfOperand(&psInst->asOperands[src2])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + if ((src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src2], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper3(const char* name, Instruction* psInst, + int dest, int src0, int src1, int src2, int paramsShouldFollowWriteMask) +{ + CallHelper3(name, psInst, dest, src0, src1, src2, paramsShouldFollowWriteMask, TO_AUTO_BITCAST_TO_FLOAT); +} + +void ToMetal::CallHelper2(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + + int isDotProduct = (strncmp(name, "dot", 3) == 0) ? 1 : 0; + int numParenthesis = 0; + + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0]) + && CanForceToHalfOperand(&psInst->asOperands[src1])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, isDotProduct ? 1 : dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2Int(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper2UInt(const char* name, Instruction* psInst, + int dest, int src0, int src1, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_UINT; + bstring glsl = *psContext->currentGLSLString; + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + uint32_t src1SwizCount = psInst->asOperands[src1].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[src0].GetNumSwizzleElements(destMask); + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + int numParenthesis = 0; + + if ((src1SwizCount != src0SwizCount) && paramsShouldFollowWriteMask) + { + uint32_t maxElems = std::max(src1SwizCount, src0SwizCount); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[src1], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::CallHelper1(const char* name, Instruction* psInst, + int dest, int src0, int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_FLOAT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + if (CanForceToHalfOperand(&psInst->asOperands[dest]) + && CanForceToHalfOperand(&psInst->asOperands[src0])) + ui32Flags = TO_FLAG_FORCE_HALF | TO_AUTO_BITCAST_TO_FLOAT; + + AddAssignToDest(&psInst->asOperands[dest], ui32Flags & TO_FLAG_FORCE_HALF ? SVT_FLOAT16 : SVT_FLOAT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +//Result is an int. +void ToMetal::CallHelper1Int( + const char* name, + Instruction* psInst, + const int dest, + const int src0, + int paramsShouldFollowWriteMask) +{ + uint32_t ui32Flags = TO_AUTO_BITCAST_TO_INT; + bstring glsl = *psContext->currentGLSLString; + uint32_t dstSwizCount = psInst->asOperands[dest].GetNumSwizzleElements(); + uint32_t destMask = paramsShouldFollowWriteMask ? psInst->asOperands[dest].GetAccessMask() : OPERAND_4_COMPONENT_MASK_ALL; + int numParenthesis = 0; + + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[dest], SVT_INT, dstSwizCount, psInst->ui32PreciseMask, numParenthesis); + + bformata(glsl, "%s(", name); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[src0], ui32Flags, destMask); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTexelFetch( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTexelFetchOffset( + Instruction* psInst, + const ResourceBinding* psBinding, + bstring glsl) +{ + int numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], psContext->psShader->sInfo.GetTextureDataType(psInst->asOperands[2].ui32RegisterNumber), 4, psInst->ui32PreciseMask, numParenthesis); + + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ".read("); + + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_BUFFER: + { + psContext->m_Reflection.OnDiagnostics("Buffer resources not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + { + psContext->m_Reflection.OnDiagnostics("Multisampled texture arrays not supported in Metal (in texel fetch)", 0, true); + return; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d", psInst->iUAddrOffset); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + bformata(glsl, " + %d, ", psInst->iUAddrOffset); + + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Y); + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_Z); // Array index + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC3, 7 /* .xyz */); + bformata(glsl, "+ ivec3(%d, %d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset, psInst->iWAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_W); // Lod level + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + { + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_EXPAND_TO_VEC2, 3 /* .xy */); + bformata(glsl, "+ ivec2(%d, %d), ", psInst->iUAddrOffset, psInst->iVAddrOffset); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); // Sample index + break; + } + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + case REFLECT_RESOURCE_DIMENSION_BUFFEREX: + default: + { + // Shouldn't happen. Cubemap reads are not supported in HLSL + ASSERT(0); + break; + } + } + bcatcstr(glsl, ")"); + + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); +} + +//Makes sure the texture coordinate swizzle is appropriate for the texture type. +//i.e. vecX for X-dimension texture. +//Currently supports floating point coord only, so not used for texelFetch. +void ToMetal::TranslateTexCoord( + const RESOURCE_DIMENSION eResDim, + Operand* psTexCoordOperand) +{ + uint32_t flags = TO_AUTO_BITCAST_TO_FLOAT; + uint32_t opMask = OPERAND_4_COMPONENT_MASK_ALL; + bool isArray = false; + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + //Vec1 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + // x for coord, y for array element + opMask = OPERAND_4_COMPONENT_MASK_X; + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_Y; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + //Vec2 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + case RESOURCE_DIMENSION_TEXTURE3D: + { + //Vec3 texcoord. Mask out the other components. + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + // xy for coord, z for array element + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_Z; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + // xyz for coord, w for array element + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + bcatcstr(glsl, ", round("); + + opMask = OPERAND_4_COMPONENT_MASK_W; + flags = TO_AUTO_BITCAST_TO_FLOAT; + isArray = true; + break; + } + default: + { + ASSERT(0); + break; + } + } + + //FIXME detect when integer coords are needed. + bstring glsl = *psContext->currentGLSLString; + glsl << TranslateOperand(psTexCoordOperand, flags, opMask); + + if (isArray) + bcatcstr(glsl, ")"); +} + +void ToMetal::GetResInfoData(Instruction* psInst, int index, int destElem) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + + psContext->AddIndentation(); + AddOpAssignToDestWithMask(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis, 1 << destElem); + + const char *metalGetters[] = { ".get_width(", ".get_height(", ".get_depth(", ".get_num_mip_levels()" }; + int dim = GetNumTextureDimensions(psInst->eResDim); + if (dim < (index + 1) && index != 3) + { + bcatcstr(glsl, eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? "uint(0)" : "0.0"); + } + else + { + if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT) + { + bcatcstr(glsl, "float("); + numParenthesis++; + } + else if (eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_RCPFLOAT) + { + bcatcstr(glsl, "1.0f / float("); + numParenthesis++; + } + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NAME_ONLY); + if ((index == 1 && psInst->eResDim == RESOURCE_DIMENSION_TEXTURE1DARRAY) || + (index == 2 && (psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DARRAY || + psInst->eResDim == RESOURCE_DIMENSION_TEXTURE2DMSARRAY))) + { + bcatcstr(glsl, ".get_array_size()"); + } + else + { + bcatcstr(glsl, metalGetters[index]); + + if (index < 3) + { + if (psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMS && + psInst->eResDim != RESOURCE_DIMENSION_TEXTURE2DMSARRAY) + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); //mip level + + bcatcstr(glsl, ")"); + } + } + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateTextureSample(Instruction* psInst, + uint32_t ui32Flags) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + int hasParamOffset = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? 1 : 0; + + Operand* psDest = &psInst->asOperands[0]; + Operand* psDestAddr = &psInst->asOperands[1]; + Operand* psSrcOff = (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) ? &psInst->asOperands[2] : 0; + Operand* psSrcTex = &psInst->asOperands[2 + hasParamOffset]; + Operand* psSrcSamp = &psInst->asOperands[3 + hasParamOffset]; + Operand* psSrcRef = (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) ? &psInst->asOperands[4 + hasParamOffset] : 0; + Operand* psSrcLOD = (ui32Flags & TEXSMP_FLAG_LOD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDx = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[4] : 0; + Operand* psSrcDy = (ui32Flags & TEXSMP_FLAG_GRAD) ? &psInst->asOperands[5] : 0; + Operand* psSrcBias = (ui32Flags & TEXSMP_FLAG_BIAS) ? &psInst->asOperands[4] : 0; + + const char *funcName = ""; + const char* gradSwizzle = ""; + const char *gradientName = ""; + + uint32_t ui32NumOffsets = 0; + + const RESOURCE_DIMENSION eResDim = psContext->psShader->aeResourceDims[psSrcTex->ui32RegisterNumber]; + + if (ui32Flags & TEXSMP_FLAG_GATHER) + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "gather_compare"; + else + funcName = "gather"; + } + else + { + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + funcName = "sample_compare"; + else + funcName = "sample"; + } + + switch (eResDim) + { + case RESOURCE_DIMENSION_TEXTURE1D: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2D: + { + gradSwizzle = ".xy"; + gradientName = "gradient2d"; + ui32NumOffsets = 2; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBE: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + case RESOURCE_DIMENSION_TEXTURE3D: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradient3d"; + break; + } + case RESOURCE_DIMENSION_TEXTURE1DARRAY: + { + gradSwizzle = ".x"; + ui32NumOffsets = 1; + break; + } + case RESOURCE_DIMENSION_TEXTURE2DARRAY: + { + gradSwizzle = ".xy"; + ui32NumOffsets = 2; + gradientName = "gradient2d"; + break; + } + case RESOURCE_DIMENSION_TEXTURECUBEARRAY: + { + gradSwizzle = ".xyz"; + ui32NumOffsets = 3; + gradientName = "gradientcube"; + break; + } + default: + { + ASSERT(0); + break; + } + } + + + SHADER_VARIABLE_TYPE dataType = psContext->psShader->sInfo.GetTextureDataType(psSrcTex->ui32RegisterNumber); + psContext->AddIndentation(); + AddAssignToDest(psDest, dataType, psSrcTex->GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); + + std::string texName = TranslateOperand(psSrcTex, TO_FLAG_NAME_ONLY); + + // TextureName.FuncName( + glsl << texName; + bformata(glsl, ".%s(", funcName); + + bool isDepthSampler = false; + for (unsigned j = 0, m = m_Textures.size(); j < m; ++j) + { + if (m_Textures[j].name == texName) + { + isDepthSampler = m_Textures[j].isDepthSampler; + break; + } + } + + // Sampler name + // on ios pre-GPUFamily3 we MUST have constexpr in shader for a sampler with compare func + // for now we use fixed shadow sampler in all cases of depth compare (ATM all depth compares are interpreted as shadow usage) + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE && IsMobileTarget(psContext)) + { + bcatcstr(glsl, "_mtl_xl_shadow_sampler"); + } + else + { + std::string sampName = TranslateOperand(psSrcSamp, TO_FLAG_NAME_ONLY); + + // insert the "sampler" prefix if the sampler name is equal to the texture name (default sampler) + if (texName == sampName) + sampName.insert(0, "sampler"); + glsl << sampName; + } + + bcatcstr(glsl, ", "); + + // Texture coordinates + TranslateTexCoord(eResDim, psDestAddr); + + // Depth compare reference value + if (ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) + { + bcatcstr(glsl, ", saturate("); // TODO: why the saturate here? + glsl << TranslateOperand(psSrcRef, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + } + + // lod_options (LOD/grad/bias) based on the flags + if (ui32Flags & TEXSMP_FLAG_LOD) + { + bcatcstr(glsl, ", level("); + glsl << TranslateOperand(psSrcLOD, TO_AUTO_BITCAST_TO_FLOAT); + if (psContext->psShader->ui32MajorVersion < 4) + { + bcatcstr(glsl, ".w"); + } + bcatcstr(glsl, ")"); + } + else if (ui32Flags & TEXSMP_FLAG_FIRSTLOD) + { + bcatcstr(glsl, ", level(0.0)"); + } + else if (ui32Flags & TEXSMP_FLAG_GRAD) + { + glsl << std::string(", ") << std::string(gradientName) << std::string("(float4("); + glsl << TranslateOperand(psSrcDx, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ", float4("); + glsl << TranslateOperand(psSrcDy, TO_AUTO_BITCAST_TO_FLOAT); + bcatcstr(glsl, ")"); + bcatcstr(glsl, gradSwizzle); + bcatcstr(glsl, ")"); + } + else if (ui32Flags & (TEXSMP_FLAG_BIAS)) + { + glsl << std::string(", bias(") << TranslateOperand(psSrcBias, TO_AUTO_BITCAST_TO_FLOAT) << std::string(")"); + } + + bool hadOffset = false; + + // Add offset param + if (psInst->bAddressOffset) + { + hadOffset = true; + if (ui32NumOffsets == 1) + { + bformata(glsl, ", %d", + psInst->iUAddrOffset); + } + else if (ui32NumOffsets == 2) + { + bformata(glsl, ", int2(%d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset); + } + else if (ui32NumOffsets == 3) + { + bformata(glsl, ", int3(%d, %d, %d)", + psInst->iUAddrOffset, + psInst->iVAddrOffset, + psInst->iWAddrOffset); + } + } + // HLSL gather has a variant with separate offset operand + else if (ui32Flags & TEXSMP_FLAG_PARAMOFFSET) + { + hadOffset = true; + uint32_t mask = OPERAND_4_COMPONENT_MASK_X; + if (ui32NumOffsets > 1) + mask |= OPERAND_4_COMPONENT_MASK_Y; + if (ui32NumOffsets > 2) + mask |= OPERAND_4_COMPONENT_MASK_Z; + + bcatcstr(glsl, ","); + glsl << TranslateOperand(psSrcOff, TO_FLAG_INTEGER, mask); + } + + // Add texture gather component selection if needed + if ((ui32Flags & TEXSMP_FLAG_GATHER) && psSrcSamp->GetNumSwizzleElements() > 0) + { + ASSERT(psSrcSamp->GetNumSwizzleElements() == 1); + if (psSrcSamp->aui32Swizzle[0] != OPERAND_4_COMPONENT_X) + { + if (!(ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE)) + { + // Need to add offset param to match func overload + if (!hadOffset) + { + if (ui32NumOffsets == 1) + bcatcstr(glsl, ", 0"); + else + bformata(glsl, ", int%d(0)", ui32NumOffsets); + } + + bcatcstr(glsl, ", component::"); + glsl << TranslateOperandSwizzle(psSrcSamp, OPERAND_4_COMPONENT_MASK_ALL, 0, false); + } + else + { + psContext->m_Reflection.OnDiagnostics("Metal supports gather compare only for the first component.", 0, true); + } + } + } + + bcatcstr(glsl, ")"); + + if (!((ui32Flags & TEXSMP_FLAG_DEPTHCOMPARE) || isDepthSampler) || (ui32Flags & TEXSMP_FLAG_GATHER)) + { + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psSrcTex->iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(psSrcTex, psDest->GetAccessMask(), 0); + } + AddAssignPrologue(numParenthesis); +} + +// Handle cases where vector components are accessed with dynamic index ([] notation). +// A bit ugly hack because compiled HLSL uses byte offsets to access data in structs => we are converting +// the offset back to vector component index in runtime => calculating stuff back and forth. +// TODO: Would be better to eliminate the offset calculation ops and use indexes straight on. Could be tricky though... +void ToMetal::TranslateDynamicComponentSelection(const ShaderVarType* psVarType, const Operand* psByteAddr, uint32_t offset, uint32_t mask) +{ + bstring glsl = *psContext->currentGLSLString; + ASSERT(psVarType->Class == SVC_VECTOR); + + bcatcstr(glsl, "["); // Access vector component with [] notation + if (offset > 0) + bcatcstr(glsl, "("); + + // The var containing byte address to the requested element + glsl << TranslateOperand(psByteAddr, TO_FLAG_UNSIGNED_INTEGER, mask); + + if (offset > 0)// If the vector is part of a struct, there is an extra offset in our byte address + bformata(glsl, " - %du)", offset); // Subtract that first + + bcatcstr(glsl, " >> 0x2u"); // Convert byte offset to index: div by four + bcatcstr(glsl, "]"); +} + +void ToMetal::TranslateShaderStorageStore(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + int srcComponent = 0; + + Operand* psDest = 0; + Operand* psDestAddr = 0; + Operand* psDestByteOff = 0; + Operand* psSrc = 0; + + + switch (psInst->eOpcode) + { + case OPCODE_STORE_STRUCTURED: + psDest = &psInst->asOperands[0]; + psDestAddr = &psInst->asOperands[1]; + psDestByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + + break; + case OPCODE_STORE_RAW: + case OPCODE_STORE_UAV_TYPED: // Hack typed buffer as raw buf + psDest = &psInst->asOperands[0]; + psDestByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t dstOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE dstOffType = psDestByteOff->GetDataType(psContext); + if (dstOffType == SVT_INT || dstOffType == SVT_INT16 || dstOffType == SVT_INT12) + dstOffFlag = TO_FLAG_INTEGER; + + for (component = 0; component < 4; component++) + { + ASSERT(psInst->asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE); + if (psInst->asOperands[0].ui32CompMask & (1 << component)) + { + psContext->AddIndentation(); + glsl << TranslateOperand(psDest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + + if (psDestAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psDestAddr, TO_FLAG_INTEGER | TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, "].value"); + } + + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psDestByteOff, dstOffFlag); + if (psInst->eOpcode == OPCODE_STORE_UAV_TYPED) + { + bcatcstr(glsl, ")"); + } + else + { + bcatcstr(glsl, " >> 2"); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + bcatcstr(glsl, ")"); + + if (component != 0) + { + bformata(glsl, " + %d", component); + if (dstOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + } + bcatcstr(glsl, "]"); + + //Dest type is currently always a uint array. + bcatcstr(glsl, " = "); + if (psSrc->GetNumSwizzleElements() > 1) + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, 1 << (srcComponent++)); + else + glsl << TranslateOperand(psSrc, TO_FLAG_UNSIGNED_INTEGER, OPERAND_4_COMPONENT_MASK_X); + + bformata(glsl, ";\n"); + } + } +} + +void ToMetal::TranslateShaderStorageLoad(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int component; + Operand* psDest = 0; + Operand* psSrcAddr = 0; + Operand* psSrcByteOff = 0; + Operand* psSrc = 0; + + switch (psInst->eOpcode) + { + case OPCODE_LD_STRUCTURED: + psDest = &psInst->asOperands[0]; + psSrcAddr = &psInst->asOperands[1]; + psSrcByteOff = &psInst->asOperands[2]; + psSrc = &psInst->asOperands[3]; + break; + case OPCODE_LD_RAW: + case OPCODE_LD_UAV_TYPED: // Hack typed buffer as raw buf + psDest = &psInst->asOperands[0]; + psSrcByteOff = &psInst->asOperands[1]; + psSrc = &psInst->asOperands[2]; + break; + default: + ASSERT(0); + break; + } + + uint32_t destCount = psDest->GetNumSwizzleElements(); + uint32_t destMask = psDest->GetAccessMask(); + + int numParenthesis = 0; + int firstItemAdded = 0; + SHADER_VARIABLE_TYPE destDataType = psDest->GetDataType(psContext); + uint32_t srcOffFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE srcOffType = psSrcByteOff->GetDataType(psContext); + if (srcOffType == SVT_INT || srcOffType == SVT_INT16 || srcOffType == SVT_INT12) + srcOffFlag = TO_FLAG_INTEGER; + + psContext->AddIndentation(); + AddAssignToDest(psDest, destDataType, destCount, psInst->ui32PreciseMask, numParenthesis); + if (destCount > 1) + { + bformata(glsl, "%s(", GetConstructorForTypeMetal(destDataType, destCount)); + numParenthesis++; + } + for (component = 0; component < 4; component++) + { + bool addedBitcast = false; + if (!(destMask & (1 << component))) + continue; + + if (firstItemAdded) + bcatcstr(glsl, ", "); + else + firstItemAdded = 1; + + // always uint array atm + if (destDataType == SVT_FLOAT) + { + // input already in uints, need bitcast + bcatcstr(glsl, "as_type("); + addedBitcast = true; + } + else if (destDataType == SVT_INT || destDataType == SVT_INT16 || destDataType == SVT_INT12) + { + bcatcstr(glsl, "int("); + addedBitcast = true; + } + + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + + if (psSrcAddr) + { + bcatcstr(glsl, "["); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_INTEGER); + bcatcstr(glsl, "].value"); + } + bcatcstr(glsl, "[("); + glsl << TranslateOperand(psSrcByteOff, srcOffFlag); + if (psInst->eOpcode == OPCODE_LD_UAV_TYPED) + { + bcatcstr(glsl, ")"); + } + else + { + bcatcstr(glsl, " >> 2"); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + + bformata(glsl, ") + %d", psSrc->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE ? psSrc->aui32Swizzle[component] : component); + if (srcOffFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + bcatcstr(glsl, "]"); + + if (addedBitcast) + bcatcstr(glsl, ")"); + } + AddAssignPrologue(numParenthesis); +} + +void ToMetal::TranslateAtomicMemOp(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + uint32_t ui32DataTypeFlag = TO_FLAG_INTEGER; + const char* func = ""; + Operand* dest = 0; + Operand* previousValue = 0; + Operand* destAddr = 0; + Operand* src = 0; + Operand* compare = 0; + int texDim = 0; + bool isUint = true; + + switch (psInst->eOpcode) + { + case OPCODE_IMM_ATOMIC_IADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IADD\n"); + } + func = "atomic_fetch_add_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IADD\n"); + } + func = "atomic_fetch_add_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_AND: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_AND\n"); + } + func = "atomic_fetch_and_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_AND: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_AND\n"); + } + func = "atomic_fetch_and_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_OR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_OR\n"); + } + func = "atomic_fetch_or_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_OR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_OR\n"); + } + func = "atomic_fetch_or_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_XOR\n"); + } + func = "atomic_fetch_xor_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_XOR\n"); + } + func = "atomic_fetch_xor_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + + case OPCODE_IMM_ATOMIC_EXCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_EXCH\n"); + } + func = "atomic_exchange_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CMP_EXC\n"); + } + func = "atomic_compare_exchange_weak_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + compare = &psInst->asOperands[3]; + src = &psInst->asOperands[4]; + break; + } + case OPCODE_ATOMIC_CMP_STORE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_CMP_STORE\n"); + } + func = "atomic_compare_exchange_weak_explicit"; + previousValue = 0; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + compare = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_IMM_ATOMIC_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMIN\n"); + } + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMIN\n"); + } + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMIN\n"); + } + func = "atomic_fetch_min_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMIN\n"); + } + func = "atomic_fetch_min_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_UMAX\n"); + } + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_UMAX\n"); + } + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + case OPCODE_IMM_ATOMIC_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_IMAX\n"); + } + func = "atomic_fetch_max_explicit"; + previousValue = &psInst->asOperands[0]; + dest = &psInst->asOperands[1]; + destAddr = &psInst->asOperands[2]; + src = &psInst->asOperands[3]; + break; + } + case OPCODE_ATOMIC_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ATOMIC_IMAX\n"); + } + func = "atomic_fetch_max_explicit"; + dest = &psInst->asOperands[0]; + destAddr = &psInst->asOperands[1]; + src = &psInst->asOperands[2]; + break; + } + default: + ASSERT(0); + break; + } + + psContext->AddIndentation(); + + const ResourceBinding* psBinding = 0; + if (dest->eType != OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY) + { + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, dest->ui32RegisterNumber, &psBinding); + + if (psBinding->eType == RTYPE_UAV_RWTYPED) + { + isUint = (psBinding->ui32ReturnType == RETURN_TYPE_UINT); + + // Find out if it's texture and of what dimension + switch (psBinding->eDimension) + { + case REFLECT_RESOURCE_DIMENSION_TEXTURE1D: + texDim = 1; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBE: + case REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS: + texDim = 2; + break; + case REFLECT_RESOURCE_DIMENSION_TEXTURE3D: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY: + case REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY: + texDim = 3; + break; + case REFLECT_RESOURCE_DIMENSION_BUFFER: // Hack typed buffer as raw buf + break; + default: + ASSERT(0); + break; + } + } + } + + if (texDim > 0) + { + psContext->m_Reflection.OnDiagnostics("Texture atomics are not supported in Metal", 0, true); + return; + } + + if (isUint) + ui32DataTypeFlag = TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT; + else + ui32DataTypeFlag = TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT; + + if (compare) + { + bcatcstr(glsl, "{\n"); + ++psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "uint compare_value = "); + glsl << TranslateOperand(compare, ui32DataTypeFlag); + bcatcstr(glsl, ";\n"); + psContext->AddIndentation(); + } + else if (previousValue) + AddAssignToDest(previousValue, isUint ? SVT_UINT : SVT_INT, 1, psInst->ui32PreciseMask, numParenthesis); + + bcatcstr(glsl, func); + bcatcstr(glsl, "("); + + uint32_t destAddrFlag = TO_FLAG_UNSIGNED_INTEGER; + SHADER_VARIABLE_TYPE destAddrType = destAddr->GetDataType(psContext); + if (destAddrType == SVT_INT || destAddrType == SVT_INT16 || destAddrType == SVT_INT12) + destAddrFlag = TO_FLAG_INTEGER; + + if (dest->eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW) + bcatcstr(glsl, "reinterpret_cast(&"); + else + bcatcstr(glsl, "reinterpret_cast(&"); + glsl << TranslateOperand(dest, TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY); + bcatcstr(glsl, "["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_X); + + if (!psBinding || psBinding->eType != RTYPE_UAV_RWTYPED) + { + // Structured buf if we have both x & y swizzles. Raw buf has only x -> no .value[] + if (destAddr->GetNumSwizzleElements(OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y) == 2) + { + bcatcstr(glsl, "]"); + bcatcstr(glsl, ".value["); + glsl << TranslateOperand(destAddr, destAddrFlag, OPERAND_4_COMPONENT_MASK_Y); + } + + bcatcstr(glsl, " >> 2");//bytes to floats + if (destAddrFlag == TO_FLAG_UNSIGNED_INTEGER) + bcatcstr(glsl, "u"); + } + bcatcstr(glsl, "]), "); + + if (compare) + bcatcstr(glsl, "&compare_value, "); + + glsl << TranslateOperand(src, ui32DataTypeFlag); + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + if (compare) + bcatcstr(glsl, ", memory_order::memory_order_relaxed"); + bcatcstr(glsl, ")"); + if (previousValue) + { + AddAssignPrologue(numParenthesis); + } + else + bcatcstr(glsl, ";\n"); + + if (compare) + { + if (previousValue) + { + psContext->AddIndentation(); + AddAssignToDest(previousValue, SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "compare_value"); + AddAssignPrologue(numParenthesis); + } + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + } +} + +void ToMetal::TranslateConditional( + Instruction* psInst, + bstring glsl) +{ + const char* statement = ""; + if (psInst->eOpcode == OPCODE_BREAKC) + { + statement = "break"; + } + else if (psInst->eOpcode == OPCODE_CONTINUEC) + { + statement = "continue"; + } + else if (psInst->eOpcode == OPCODE_RETC) // FIXME! Need to spew out shader epilogue + { + if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) + statement = "return"; + else + statement = "return output"; + } + + + int isBool = psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL; + + if (isBool) + { + bcatcstr(glsl, "if("); + if (psInst->eBooleanTestType != INSTRUCTION_TEST_NONZERO) + bcatcstr(glsl, "!"); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_BOOL); + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, "){%s;}\n", statement); + } + else + { + bcatcstr(glsl, "){\n"); + } + } + else + { + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")==uint(0)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")==uint(0)){\n"); + } + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER); + + if (psInst->eOpcode != OPCODE_IF) + { + bformata(glsl, ")!=uint(0)){%s;}\n", statement); + } + else + { + bcatcstr(glsl, ")!=uint(0)){\n"); + } + } + } +} + +void ToMetal::TranslateInstruction(Instruction* psInst) +{ + bstring glsl = *psContext->currentGLSLString; + int numParenthesis = 0; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + // Uncomment to print instruction IDs + //psContext->AddIndentation(); + //bformata(glsl, "//Instruction %d\n", psInst->id); + #if 0 + if (psInst->id == 73) + { + ASSERT(1); //Set breakpoint here to debug an instruction from its ID. + } + #endif + } + + switch (psInst->eOpcode) + { + case OPCODE_FTOI: + case OPCODE_FTOU: + { + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE castType = psInst->eOpcode == OPCODE_FTOU ? SVT_UINT : SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_FTOU) + bcatcstr(glsl, "//FTOU\n"); + else + bcatcstr(glsl, "//FTOI\n"); + } + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_SINT_16: + castType = SVT_INT16; + ASSERT(psInst->eOpcode == OPCODE_FTOI); + break; + case OPERAND_MIN_PRECISION_UINT_16: + castType = SVT_UINT16; + ASSERT(psInst->eOpcode == OPCODE_FTOU); + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_FLOAT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_MOV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOV\n"); + } + psContext->AddIndentation(); + + // UNITY SPECIFIC: you can check case 1158280 + // This looks like a hack because it is! There is a bug that is quite hard to reproduce. + // When doing data analysis we assume that immediates are ints and hope it will be promoted later + // which is kinda fine unless there is an unfortunate combination happening: + // We operate on 4-component registers - we need different components to be treated as float/int + // but we should not use float operations (as this will mark register as float) + // instead "float" components should be used for MOV and friends to other registers + // and they, in turn, should be used for float ops + // In pseudocode it can look like this: + // var2.xy = var1.xy; var1.xy = var2.xy; // not marked as float explicitly + // bool foo = var1.z | <...> // marked as int + // Now we have immediate that will be treated as int but NOT promoted because we think we have all ints + // var1.w = 1 // var1 is marked int + // What is important is that this temporary is marked as int by us but DX compiler treats it + // as "normal" float (and rightfully so) [or rather - we speak about cases where it does treat it as float] + // It is also important that we speak about temps (otherwise we have explicit data type to use, so promotion works) + // + // At this point we have mov immediate to int temp (which should really be float temp) + { + Operand *pDst = &psInst->asOperands[0], *pSrc = &psInst->asOperands[1]; + if (pDst->GetDataType(psContext) == SVT_INT // dst marked as int + && pDst->eType == OPERAND_TYPE_TEMP // dst is temp + && pSrc->eType == OPERAND_TYPE_IMMEDIATE32 // src is immediate + && psContext->psShader->psIntTempSizes[pDst->ui32RegisterNumber] == 0 // no temp register allocated + ) + { + pDst->aeDataType[0] = pDst->aeDataType[1] = pDst->aeDataType[2] = pDst->aeDataType[3] = SVT_FLOAT; + } + } + + AddMOVBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], psInst->ui32PreciseMask); + break; + } + case OPCODE_ITOF://signed to float + case OPCODE_UTOF://unsigned to float + { + SHADER_VARIABLE_TYPE castType = SVT_FLOAT; + uint32_t dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + uint32_t srcCount = psInst->asOperands[1].GetNumSwizzleElements(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_ITOF) + bcatcstr(glsl, "//ITOF\n"); + else + bcatcstr(glsl, "//UTOF\n"); + } + + switch (psInst->asOperands[0].eMinPrecision) + { + case OPERAND_MIN_PRECISION_DEFAULT: + break; + case OPERAND_MIN_PRECISION_FLOAT_2_8: + castType = SVT_FLOAT10; + break; + case OPERAND_MIN_PRECISION_FLOAT_16: + castType = SVT_FLOAT16; + break; + default: + ASSERT(0); // We'd be doing bitcasts into low/mediump ints, not good. + } + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], castType, srcCount, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(castType, dstCount)); + bcatcstr(glsl, "("); // 1 + glsl << TranslateOperand(&psInst->asOperands[1], psInst->eOpcode == OPCODE_UTOF ? TO_AUTO_BITCAST_TO_UINT : TO_AUTO_BITCAST_TO_INT, psInst->asOperands[0].GetAccessMask()); + bcatcstr(glsl, ")"); // 1 + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_MAD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAD\n"); + } + CallHelper3("fma", psInst, 0, 1, 2, 3, 1); + break; + } + case OPCODE_IMAD: + { + uint32_t ui32Flags = TO_FLAG_INTEGER; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAD\n"); + } + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + ui32Flags = TO_FLAG_UNSIGNED_INTEGER; + } + + CallTernaryOp("*", "+", psInst, 0, 1, 2, 3, ui32Flags); + break; + } + case OPCODE_DFMA: + { + uint32_t ui32Flags = TO_FLAG_DOUBLE; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DFMA\n"); + } + CallHelper3("fma", psInst, 0, 1, 2, 3, 1, ui32Flags); + break; + } + case OPCODE_DADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DADD\n"); + } + CallBinaryOp("+", psInst, 0, 1, 2, SVT_DOUBLE); + break; + } + case OPCODE_IADD: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IADD\n"); + } + //Is this a signed or unsigned add? + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + CallBinaryOp("+", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ADD\n"); + } + CallBinaryOp("+", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_OR: + { + /*Todo: vector version */ + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//OR\n"); + } + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " || "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else + CallBinaryOp("|", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_AND: + { + SHADER_VARIABLE_TYPE eA = psInst->asOperands[1].GetDataType(psContext); + SHADER_VARIABLE_TYPE eB = psInst->asOperands[2].GetDataType(psContext); + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//AND\n"); + } + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + const uint32_t dstSwizCount = psInst->asOperands[0].GetNumSwizzleElements(); + SHADER_VARIABLE_TYPE eDataType = psInst->asOperands[0].GetDataType(psContext); + uint32_t ui32Flags = SVTTypeToFlag(eDataType); + if (psInst->asOperands[0].GetDataType(psContext) == SVT_BOOL) + { + int needsParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_BOOL, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " && "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_BOOL, destMask); + AddAssignPrologue(needsParenthesis); + } + else if ((eA == SVT_BOOL || eB == SVT_BOOL) && !(eA == SVT_BOOL && eB == SVT_BOOL)) + { + int boolOp = eA == SVT_BOOL ? 1 : 2; + int otherOp = eA == SVT_BOOL ? 2 : 1; + int needsParenthesis = 0; + uint32_t i; + psContext->AddIndentation(); + + if (dstSwizCount == 1) + { + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, " ? "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, " : "); + + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + switch (eDataType) + { + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + case SVT_DOUBLE: + bcatcstr(glsl, "0.0"); + break; + default: + bcatcstr(glsl, "0"); + } + bcatcstr(glsl, ")"); + } + else if (eDataType == SVT_FLOAT) + { + // We can use select() + AddAssignToDest(&psInst->asOperands[0], eDataType, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); + bcatcstr(glsl, "select("); + bcatcstr(glsl, GetConstructorForTypeMetal(eDataType, dstSwizCount)); + bcatcstr(glsl, "("); + for (i = 0; i < dstSwizCount; i++) + { + if (i > 0) + bcatcstr(glsl, ", "); + bcatcstr(glsl, "0.0"); + } + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], ui32Flags, destMask); + bcatcstr(glsl, ", "); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_BOOL, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ")"); + bcatcstr(glsl, ")"); + } + else + { + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, dstSwizCount, psInst->ui32PreciseMask, needsParenthesis); + bcatcstr(glsl, "("); + bcatcstr(glsl, GetConstructorForTypeMetal(SVT_UINT, dstSwizCount)); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[boolOp], TO_FLAG_BOOL, destMask); + bcatcstr(glsl, ") * 0xffffffffu) & "); + glsl << TranslateOperand(&psInst->asOperands[otherOp], TO_FLAG_UNSIGNED_INTEGER, destMask); + } + + AddAssignPrologue(needsParenthesis); + } + else + { + CallBinaryOp("&", psInst, 0, 1, 2, SVT_UINT); + } + + + break; + } + case OPCODE_GE: + { + /* + dest = vec4(greaterThanEqual(vec4(srcA), vec4(srcB)); + Caveat: The result is a boolean but HLSL asm returns 0xFFFFFFFF/0x0 instead. + */ + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_NONE); + break; + } + case OPCODE_MUL: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MUL\n"); + } + CallBinaryOp("*", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_IMUL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMUL\n"); + } + if (psInst->asOperands[1].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_NULL); + + CallBinaryOp("*", psInst, 1, 2, 3, eType); + break; + } + case OPCODE_UDIV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UDIV\n"); + } + //destQuotient, destRemainder, src0, src1 + + // There are cases where destQuotient is the same variable as src0 or src1. If that happens, + // we need to compute "%" before the "/" in order to avoid src0 or src1 being overriden first. + if ((psInst->asOperands[0].eType != psInst->asOperands[2].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[2].ui32RegisterNumber) + && (psInst->asOperands[0].eType != psInst->asOperands[3].eType || psInst->asOperands[0].ui32RegisterNumber != psInst->asOperands[3].ui32RegisterNumber)) + { + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + } + else + { + CallBinaryOp("%", psInst, 1, 2, 3, SVT_UINT); + CallBinaryOp("/", psInst, 0, 2, 3, SVT_UINT); + } + break; + } + case OPCODE_DIV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DIV\n"); + } + CallBinaryOp("/", psInst, 0, 1, 2, SVT_FLOAT); + break; + } + case OPCODE_SINCOS: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SINCOS\n"); + } + // Need careful ordering if src == dest[0], as then the cos() will be reading from wrong value + if (psInst->asOperands[0].eType == psInst->asOperands[2].eType && + psInst->asOperands[0].ui32RegisterNumber == psInst->asOperands[2].ui32RegisterNumber) + { + // sin() result overwrites source, do cos() first. + // The case where both write the src shouldn't really happen anyway. + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1( + "sin", psInst, 0, 2, 1); + } + } + else + { + if (psInst->asOperands[0].eType != OPERAND_TYPE_NULL) + { + CallHelper1("sin", psInst, 0, 2, 1); + } + + if (psInst->asOperands[1].eType != OPERAND_TYPE_NULL) + { + CallHelper1("cos", psInst, 1, 2, 1); + } + } + break; + } + + case OPCODE_DP2: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2\n"); + } + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC2; + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC2; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 3 /* .xy */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP3: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP3\n"); + } + psContext->AddIndentation(); + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + uint32_t typeFlags = TO_AUTO_BITCAST_TO_FLOAT | TO_AUTO_EXPAND_TO_VEC3; + if (CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + typeFlags = TO_FLAG_FORCE_HALF | TO_AUTO_EXPAND_TO_VEC3; + + if (dstType != SVT_FLOAT16) + dstType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], dstType, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], typeFlags, 7 /* .xyz */); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DP4: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP4\n"); + } + CallHelper2("dot", psInst, 0, 1, 2, 0); + break; + } + case OPCODE_INE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INE\n"); + } + AddComparison(psInst, CMP_NE, TO_FLAG_INTEGER); + break; + } + case OPCODE_NE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NE\n"); + } + AddComparison(psInst, CMP_NE, TO_FLAG_NONE); + break; + } + case OPCODE_IGE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IGE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_INTEGER); + break; + } + case OPCODE_ILT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ILT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_INTEGER); + break; + } + case OPCODE_LT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_NONE); + break; + } + case OPCODE_IEQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IEQ\n"); + } + AddComparison(psInst, CMP_EQ, TO_FLAG_INTEGER); + break; + } + case OPCODE_ULT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ULT\n"); + } + AddComparison(psInst, CMP_LT, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_UGE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UGE\n"); + } + AddComparison(psInst, CMP_GE, TO_FLAG_UNSIGNED_INTEGER); + break; + } + case OPCODE_MOVC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MOVC\n"); + } + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], psInst->ui32PreciseMask); + break; + } + case OPCODE_SWAPC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWAPC\n"); + } + // TODO needs temps!! + ASSERT(0); + AddMOVCBinaryOp(&psInst->asOperands[0], &psInst->asOperands[2], &psInst->asOperands[4], &psInst->asOperands[3], psInst->ui32PreciseMask); + AddMOVCBinaryOp(&psInst->asOperands[1], &psInst->asOperands[2], &psInst->asOperands[3], &psInst->asOperands[4], psInst->ui32PreciseMask); + break; + } + + case OPCODE_LOG: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOG\n"); + } + CallHelper1("log2", psInst, 0, 1, 1); + break; + } + case OPCODE_RSQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RSQ\n"); + } + CallHelper1("rsqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_EXP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EXP\n"); + } + CallHelper1("exp2", psInst, 0, 1, 1); + break; + } + case OPCODE_SQRT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SQRT\n"); + } + CallHelper1("sqrt", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_PI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_PI\n"); + } + CallHelper1("ceil", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NI\n"); + } + CallHelper1("floor", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_Z: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_Z\n"); + } + CallHelper1("trunc", psInst, 0, 1, 1); + break; + } + case OPCODE_ROUND_NE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ROUND_NE\n"); + } + CallHelper1("rint", psInst, 0, 1, 1); + break; + } + case OPCODE_FRC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FRC\n"); + } + CallHelper1("fract", psInst, 0, 1, 1); + break; + } + case OPCODE_IMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMAX\n"); + } + CallHelper2Int("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMAX\n"); + } + CallHelper2UInt("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MAX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MAX\n"); + } + CallHelper2("max", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_IMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMIN\n"); + } + CallHelper2Int("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_UMIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//UMIN\n"); + } + CallHelper2UInt("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_MIN: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//MIN\n"); + } + CallHelper2("min", psInst, 0, 1, 2, 1); + break; + } + case OPCODE_GATHER4: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER); + break; + } + case OPCODE_GATHER4_PO_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_GATHER4_PO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_PO\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_PARAMOFFSET); + break; + } + case OPCODE_GATHER4_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//GATHER4_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GATHER | TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_NONE); + break; + } + case OPCODE_SAMPLE_L: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_L\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_LOD); + break; + } + case OPCODE_SAMPLE_C: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE); + break; + } + case OPCODE_SAMPLE_C_LZ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_C_LZ\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_DEPTHCOMPARE | TEXSMP_FLAG_FIRSTLOD); + break; + } + case OPCODE_SAMPLE_D: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_D\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_GRAD); + break; + } + case OPCODE_SAMPLE_B: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_B\n"); + } + TranslateTextureSample(psInst, TEXSMP_FLAG_BIAS); + break; + } + case OPCODE_RET: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RET\n"); + } + if (psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode) + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- Post shader code ---\n"); + } + + bconcat(glsl, psContext->psShader->asPhases[psContext->currentPhase].postShaderCode); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//--- End post shader code ---\n"); + } + } + psContext->AddIndentation(); + if (psContext->psShader->eShaderType == COMPUTE_SHADER || (psContext->psShader->eShaderType == PIXEL_SHADER && m_StructDefinitions[GetOutputStructName()].m_Members.size() == 0)) + bcatcstr(glsl, "return;\n"); + else + bcatcstr(glsl, "return output;\n"); + + break; + } + case OPCODE_INTERFACE_CALL: + { + ASSERT(0); + } + case OPCODE_LABEL: + { + ASSERT(0); // Never seen this + } + case OPCODE_COUNTBITS: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//COUNTBITS\n"); + } + psContext->AddIndentation(); + + // in metal popcount decl is T popcount(T), so it is important that input/output types agree + // enter assembly: when writing swizzle encoding we use 0 to say "source from x" + // now, say, we generate code o.xy = bitcount(i.xy) + // output gets component mask 1,1,0,0 (note that we use bit 1<).<..> will still collapse everything into + // popcount(i.<..>) [well, tweaking swizzle, sure] + // what does that mean is that we can safely take output component count to determine "proper" type + // note that hlsl compiler already checked that things can work out, so it should be fine doing this magic + + const Operand* dst = &psInst->asOperands[0]; + const int dstCompCount = dst->eSelMode == OPERAND_4_COMPONENT_MASK_MODE ? dst->ui32CompMask : OPERAND_4_COMPONENT_MASK_ALL; + + glsl << TranslateOperand(dst, TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = popcount("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, dstCompCount); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_HI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_HI\n"); + } + DeclareExtraFunction("firstBit_hi", "template UVecType firstBit_hi(const UVecType input) { UVecType res = clz(input); return res; };"); + // TODO implement the 0-case (must return 0xffffffff) + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_hi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_LO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_LO\n"); + } + // TODO implement the 0-case (must return 0xffffffff) + DeclareExtraFunction("firstBit_lo", "template UVecType firstBit_lo(const UVecType input) { UVecType res = ctz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_UNSIGNED_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_lo("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_FIRSTBIT_SHI: //signed high + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//FIRSTBIT_SHI\n"); + } + // TODO Not at all correct for negative values yet. + DeclareExtraFunction("firstBit_shi", "template IVecType firstBit_shi(const IVecType input) { IVecType res = clz(input); return res; };"); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = firstBit_shi("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFREV: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFREV\n"); + } + DeclareExtraFunction("bitReverse", "template UVecType bitReverse(const UVecType input)\n\ +\t\t{ UVecType x = input;\n\ +\t\t\tx = (((x & 0xaaaaaaaa) >> 1) | ((x & 0x55555555) << 1));\n\ +\t\t\tx = (((x & 0xcccccccc) >> 2) | ((x & 0x33333333) << 2));\n\ +\t\t\tx = (((x & 0xf0f0f0f0) >> 4) | ((x & 0x0f0f0f0f) << 4));\n\ +\t\t\tx = (((x & 0xff00ff00) >> 8) | ((x & 0x00ff00ff) << 8));\n\ +\t\t\treturn((x >> 16) | (x << 16));\n\ +\t\t}; "); + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER | TO_FLAG_DESTINATION); + bcatcstr(glsl, " = bitReverse("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_BFI: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BFI\n"); + } + DeclareExtraFunction("BFI", "\ +\t\ttemplate UVecType bitFieldInsert(const UVecType width, const UVecType offset, const UVecType src2, const UVecType src3)\n\ +\t\t{\n\ +\t\t\tUVecType bitmask = (((UVecType(1) << width)-1) << offset) & 0xffffffff;\n\ +\t\t\treturn ((src2 << offset) & bitmask) | (src3 & ~bitmask);\n\ +\t\t}; "); + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "bitFieldInsert("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[4], TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ")"); + + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_CUT: + case OPCODE_EMITTHENCUT_STREAM: + case OPCODE_EMIT: + case OPCODE_EMITTHENCUT: + case OPCODE_CUT_STREAM: + case OPCODE_EMIT_STREAM: + { + ASSERT(0); // Not on metal + } + case OPCODE_REP: + case OPCODE_ENDREP: + { + ASSERT(0); // Shouldn't see these anymore + } + case OPCODE_LOOP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOOP\n"); + } + psContext->AddIndentation(); + + bcatcstr(glsl, "while(true){\n"); + ++psContext->indent; + break; + } + case OPCODE_ENDLOOP: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDLOOP\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_BREAK: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAK\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "break;\n"); + break; + } + case OPCODE_BREAKC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BREAKC\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_CONTINUEC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//CONTINUEC\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_IF: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IF\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + ++psContext->indent; + break; + } + case OPCODE_RETC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RETC\n"); + } + psContext->AddIndentation(); + + TranslateConditional(psInst, glsl); + break; + } + case OPCODE_ELSE: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ELSE\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "} else {\n"); + psContext->indent++; + break; + } + case OPCODE_ENDSWITCH: + case OPCODE_ENDIF: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ENDIF\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "}\n"); + break; + } + case OPCODE_CONTINUE: + { + psContext->AddIndentation(); + bcatcstr(glsl, "continue;\n"); + break; + } + case OPCODE_DEFAULT: + { + --psContext->indent; + psContext->AddIndentation(); + bcatcstr(glsl, "default:\n"); + ++psContext->indent; + break; + } + case OPCODE_NOP: + { + break; + } + case OPCODE_SYNC: + { + const uint32_t ui32SyncFlags = psInst->ui32SyncFlags; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SYNC\n"); + } + const bool sync_threadgroup = (ui32SyncFlags & SYNC_THREAD_GROUP_SHARED_MEMORY) != 0; + const bool sync_device = (ui32SyncFlags & (SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GROUP | SYNC_UNORDERED_ACCESS_VIEW_MEMORY_GLOBAL)) != 0; + + const char* barrierFlags = "mem_flags::mem_none"; + if (sync_threadgroup && sync_device) barrierFlags = "mem_flags::mem_threadgroup | mem_flags::mem_device"; + else if (sync_threadgroup) barrierFlags = "mem_flags::mem_threadgroup"; + else if (sync_device) barrierFlags = "mem_flags::mem_device"; + + if (ui32SyncFlags & SYNC_THREADS_IN_GROUP) + { + psContext->AddIndentation(); + bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + } + else + { + psContext->AddIndentation(); bformata(glsl, "#if __HAVE_SIMDGROUP_BARRIER__\n"); + psContext->AddIndentation(); bformata(glsl, "simdgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#else\n"); + psContext->AddIndentation(); bformata(glsl, "threadgroup_barrier(%s);\n", barrierFlags); + psContext->AddIndentation(); bformata(glsl, "#endif\n"); + } + + break; + } + case OPCODE_SWITCH: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SWITCH\n"); + } + psContext->AddIndentation(); + bcatcstr(glsl, "switch(int("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")){\n"); + + psContext->indent += 2; + break; + } + case OPCODE_CASE: + { + --psContext->indent; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//case\n"); + } + psContext->AddIndentation(); + + bcatcstr(glsl, "case "); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ":\n"); + + ++psContext->indent; + break; + } + case OPCODE_EQ: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EQ\n"); + } + AddComparison(psInst, CMP_EQ, TO_FLAG_NONE); + break; + } + case OPCODE_USHR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//USHR\n"); + } + CallBinaryOp(">>", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_ISHL: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHL\n"); + } + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp("<<", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_ISHR: + { + SHADER_VARIABLE_TYPE eType = SVT_INT; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//ISHR\n"); + } + + if (psInst->asOperands[0].GetDataType(psContext) == SVT_UINT) + { + eType = SVT_UINT; + } + + CallBinaryOp(">>", psInst, 0, 1, 2, eType); + break; + } + case OPCODE_LD: + case OPCODE_LD_MS: + { + const ResourceBinding* psBinding = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_LD) + bcatcstr(glsl, "//LD\n"); + else + bcatcstr(glsl, "//LD_MS\n"); + } + + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_TEXTURE, psInst->asOperands[2].ui32RegisterNumber, &psBinding); + + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psInst->eOpcode = OPCODE_LD_UAV_TYPED; + psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + psInst->asOperands[1].iNumComponents = 1; + TranslateShaderStorageLoad(psInst); + break; + } + + if (psInst->bAddressOffset) + { + TranslateTexelFetchOffset(psInst, psBinding, glsl); + } + else + { + TranslateTexelFetch(psInst, psBinding, glsl); + } + break; + } + case OPCODE_DISCARD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DISCARD\n"); + } + + psContext->AddIndentation(); + if (psInst->eBooleanTestType == INSTRUCTION_TEST_ZERO) + { + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")==0){discard_fragment();}\n"); + } + else + { + ASSERT(psInst->eBooleanTestType == INSTRUCTION_TEST_NONZERO); + bcatcstr(glsl, "if(("); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_INTEGER); + bcatcstr(glsl, ")!=0){discard_fragment();}\n"); + } + break; + } + case OPCODE_LOD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LOD\n"); + } + //LOD computes the following vector (ClampedLOD, NonClampedLOD, 0, 0) + + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 4, psInst->ui32PreciseMask, numParenthesis); + + //If the core language does not have query-lod feature, + //then the extension is used. The name of the function + //changed between extension and core. + if (HaveQueryLod(psContext->psShader->eTargetLanguage)) + { + bcatcstr(glsl, "textureQueryLod("); + } + else + { + bcatcstr(glsl, "textureQueryLOD("); + } + + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ","); + TranslateTexCoord( + psContext->psShader->aeResourceDims[psInst->asOperands[2].ui32RegisterNumber], + &psInst->asOperands[1]); + bcatcstr(glsl, ")"); + + //The swizzle on srcResource allows the returned values to be swizzled arbitrarily before they are written to the destination. + + // iWriteMaskEnabled is forced off during DecodeOperand because swizzle on sampler uniforms + // does not make sense. But need to re-enable to correctly swizzle this particular instruction. + psInst->asOperands[2].iWriteMaskEnabled = 1; + glsl << TranslateOperandSwizzle(&psInst->asOperands[2], psInst->asOperands[0].GetAccessMask(), 0); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_EVAL_CENTROID: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_CENTROID\n"); + } + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtCentroid("); + //interpolateAtCentroid accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SAMPLE_INDEX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SAMPLE_INDEX\n"); + } + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtSample("); + //interpolateAtSample accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ");\n"); + break; + } + case OPCODE_EVAL_SNAPPED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//EVAL_SNAPPED\n"); + } + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = interpolateAtOffset("); + //interpolateAtOffset accepts in-qualified variables. + //As long as bytecode only writes vX registers in declarations + //we should be able to use the declared name directly. + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_DECLARATION_NAME); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_INTEGER); + bcatcstr(glsl, ".xy);\n"); + break; + } + case OPCODE_LD_STRUCTURED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_STRUCTURED\n"); + } + TranslateShaderStorageLoad(psInst); + break; + } + case OPCODE_LD_UAV_TYPED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_UAV_TYPED\n"); + } + Operand* psDest = &psInst->asOperands[0]; + Operand* psSrc = &psInst->asOperands[2]; + Operand* psSrcAddr = &psInst->asOperands[1]; + + const ResourceBinding* psRes = 0; + psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, psSrc->ui32RegisterNumber, &psRes); + SHADER_VARIABLE_TYPE srcDataType = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + + if (psInst->eResDim == RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psSrc->aeDataType[0] = srcDataType; + psSrcAddr->eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psSrcAddr->eType == OPERAND_TYPE_IMMEDIATE32) + psSrcAddr->iNumComponents = 1; + TranslateShaderStorageLoad(psInst); + break; + } + +#define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n + + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); + + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; + switch (psRes->eDimension) + { + case RRD(TEXTURE3D): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + default: + ASSERT(0); break; + } + + int srcCount = psSrc->GetNumSwizzleElements(), numParenthesis = 0; + psContext->AddIndentation(); + AddAssignToDest(psDest, srcDataType, srcCount, psInst->ui32PreciseMask, numParenthesis); + glsl << TranslateOperand(psSrc, TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".read("); + glsl << TranslateOperand(psSrcAddr, flags, opMask); + if (isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_W; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(psSrcAddr, TO_FLAG_UNSIGNED_INTEGER, opMask); + } + bcatcstr(glsl, ")"); + glsl << TranslateOperandSwizzle(psSrc, psDest->ui32CompMask, 0); + AddAssignPrologue(numParenthesis); + +#undef RRD + + break; + } + case OPCODE_STORE_RAW: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_RAW\n"); + } + TranslateShaderStorageStore(psInst); + break; + } + case OPCODE_STORE_STRUCTURED: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_STRUCTURED\n"); + } + TranslateShaderStorageStore(psInst); + break; + } + + case OPCODE_STORE_UAV_TYPED: + { + const ResourceBinding* psRes; + int foundResource; + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//STORE_UAV_TYPED\n"); + } + foundResource = psContext->psShader->sInfo.GetResourceFromBindingPoint(RGROUP_UAV, + psInst->asOperands[0].ui32RegisterNumber, + &psRes); + ASSERT(foundResource); + + if (psRes->eDimension == REFLECT_RESOURCE_DIMENSION_BUFFER) // Hack typed buffer as raw buf + { + psInst->asOperands[0].aeDataType[0] = ResourceReturnTypeToSVTType(psRes->ui32ReturnType, psRes->ePrecision); + psInst->asOperands[1].eSelMode = OPERAND_4_COMPONENT_SELECT_1_MODE; + if (psInst->asOperands[1].eType == OPERAND_TYPE_IMMEDIATE32) + psInst->asOperands[1].iNumComponents = 1; + TranslateShaderStorageStore(psInst); + break; + } + + psContext->AddIndentation(); + + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_NAME_ONLY); + bcatcstr(glsl, ".write("); + + #define RRD(n) REFLECT_RESOURCE_DIMENSION_ ## n + + // unlike glsl, texture arrays will have index in separate argument + const bool isArray = psRes->eDimension == RRD(TEXTURE1DARRAY) || psRes->eDimension == RRD(TEXTURE2DARRAY) + || psRes->eDimension == RRD(TEXTURE2DMSARRAY) || psRes->eDimension == RRD(TEXTURECUBEARRAY); + + uint32_t flags = TO_FLAG_UNSIGNED_INTEGER, opMask = OPERAND_4_COMPONENT_MASK_ALL; + switch (psRes->eDimension) + { + case RRD(TEXTURE1D): case RRD(TEXTURE1DARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X; + break; + case RRD(TEXTURE2D): case RRD(TEXTURE2DMS): case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y; + flags |= TO_AUTO_EXPAND_TO_VEC2; + break; + case RRD(TEXTURE3D): case RRD(TEXTURECUBE): case RRD(TEXTURECUBEARRAY): + opMask = OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z; + flags |= TO_AUTO_EXPAND_TO_VEC3; + break; + default: + ASSERT(0); + break; + } + + + glsl << TranslateOperand(&psInst->asOperands[2], ResourceReturnTypeToFlag(psRes->ui32ReturnType)); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + if (isArray) + { + // NB cube array is handled incorrectly - it needs extra "face" arg + flags = TO_FLAG_UNSIGNED_INTEGER; + switch (psRes->eDimension) + { + case RRD(TEXTURE1DARRAY): opMask = OPERAND_4_COMPONENT_MASK_Y; break; + case RRD(TEXTURE2DARRAY): case RRD(TEXTURE2DMSARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + case RRD(TEXTURECUBEARRAY): opMask = OPERAND_4_COMPONENT_MASK_Z; break; + default: ASSERT(0); break; + } + + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[1], flags, opMask); + } + bformata(glsl, ");\n"); + +#undef RRD + + break; + } + case OPCODE_LD_RAW: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LD_RAW\n"); + } + TranslateShaderStorageLoad(psInst); + break; + } + + case OPCODE_ATOMIC_CMP_STORE: + case OPCODE_IMM_ATOMIC_AND: + case OPCODE_ATOMIC_AND: + case OPCODE_IMM_ATOMIC_IADD: + case OPCODE_ATOMIC_IADD: + case OPCODE_ATOMIC_OR: + case OPCODE_ATOMIC_XOR: + case OPCODE_ATOMIC_IMAX: + case OPCODE_ATOMIC_IMIN: + case OPCODE_ATOMIC_UMAX: + case OPCODE_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_IMAX: + case OPCODE_IMM_ATOMIC_IMIN: + case OPCODE_IMM_ATOMIC_UMAX: + case OPCODE_IMM_ATOMIC_UMIN: + case OPCODE_IMM_ATOMIC_OR: + case OPCODE_IMM_ATOMIC_XOR: + case OPCODE_IMM_ATOMIC_EXCH: + case OPCODE_IMM_ATOMIC_CMP_EXCH: + { + TranslateAtomicMemOp(psInst); + break; + } + case OPCODE_UBFE: + case OPCODE_IBFE: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + if (psInst->eOpcode == OPCODE_UBFE) + bcatcstr(glsl, "//OPCODE_UBFE\n"); + else + bcatcstr(glsl, "//OPCODE_IBFE\n"); + } + + bool isUBFE = psInst->eOpcode == OPCODE_UBFE; + bool isScalar = psInst->asOperands[0].GetNumSwizzleElements() == 1; + + if (isUBFE) + { + if (isScalar) + { + DeclareExtraFunction("UBFE", "\ +uint bitFieldExtractU(uint width, uint offset, uint src);\n\ +uint bitFieldExtractU(uint width, uint offset, uint src)\n\ +{\n\ +\tbool isWidthZero = (width == 0);\n\ +\tbool needsClamp = ((width + offset) < 32);\n\ +\tuint clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tuint simpleVersion = src >> offset;\n\ +\tuint res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, (uint)0, isWidthZero);\n\ +}; "); + } + else + { + DeclareExtraFunction("UBFEV", "\ +template vec bitFieldExtractU(const vec width, const vec offset, const vec src)\n\ +{\n\ +\tvec isWidthZero = (width == 0);\n\ +\tvec needsClamp = ((width + offset) < 32);\n\ +\tvec clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tvec simpleVersion = src >> offset;\n\ +\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, vec(0), isWidthZero);\n\ +}; "); + } + } + else + { + if (isScalar) + { + DeclareExtraFunction("IBFE", "\ +template int bitFieldExtractI(uint width, uint offset, int src)\n\ +{\n\ +\tbool isWidthZero = (width == 0);\n\ +\tbool needsClamp = ((width + offset) < 32);\n\ +\tint clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tint simpleVersion = src >> offset;\n\ +\tint res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, (int)0, isWidthZero);\n\ +}; "); + } + else + { + DeclareExtraFunction("IBFEV", "\ +template vec bitFieldExtractI(const vec width, const vec offset, const vec src)\n\ +{\n\ +\tvec isWidthZero = (width == 0);\n\ +\tvec needsClamp = ((width + offset) < 32);\n\ +\tvec clampVersion = src << (32-(width+offset));\n\ +\tclampVersion = clampVersion >> (32 - width);\n\ +\tvec simpleVersion = src >> offset;\n\ +\tvec res = select(simpleVersion, clampVersion, needsClamp);\n\ +\treturn select(res, vec(0), isWidthZero);\n\ +}; "); + } + } + psContext->AddIndentation(); + + uint32_t destMask = psInst->asOperands[0].GetAccessMask(); + uint32_t src2SwizCount = psInst->asOperands[3].GetNumSwizzleElements(destMask); + uint32_t src1SwizCount = psInst->asOperands[2].GetNumSwizzleElements(destMask); + uint32_t src0SwizCount = psInst->asOperands[1].GetNumSwizzleElements(destMask); + uint32_t ui32Flags = 0; + + if (src1SwizCount != src0SwizCount || src2SwizCount != src0SwizCount) + { + uint32_t maxElems = std::max(src2SwizCount, std::max(src1SwizCount, src0SwizCount)); + ui32Flags |= (TO_AUTO_EXPAND_TO_VEC2 << (maxElems - 2)); + } + + AddAssignToDest(&psInst->asOperands[0], isUBFE ? SVT_UINT : SVT_INT, psInst->asOperands[0].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "bitFieldExtract"); + bcatcstr(glsl, isUBFE ? "U" : "I"); + bcatcstr(glsl, "("); + glsl << TranslateOperand(&psInst->asOperands[1], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], ui32Flags | TO_FLAG_UNSIGNED_INTEGER, destMask); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[3], ui32Flags | (isUBFE ? TO_FLAG_UNSIGNED_INTEGER : TO_FLAG_INTEGER), destMask); + bcatcstr(glsl, ")"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_RCP: + { + const uint32_t destElemCount = psInst->asOperands[0].GetNumSwizzleElements(); + const uint32_t srcElemCount = psInst->asOperands[1].GetNumSwizzleElements(); + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RCP\n"); + } + psContext->AddIndentation(); + + SHADER_VARIABLE_TYPE dstType = psInst->asOperands[0].GetDataType(psContext); + SHADER_VARIABLE_TYPE srcType = psInst->asOperands[1].GetDataType(psContext); + + uint32_t typeFlags = TO_FLAG_NONE; + if (dstType == SVT_FLOAT16 && srcType == SVT_FLOAT16) + { + typeFlags = TO_FLAG_FORCE_HALF; + } + else + srcType = SVT_FLOAT; + + AddAssignToDest(&psInst->asOperands[0], srcType, srcElemCount, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "(1.0) / "); + bcatcstr(glsl, GetConstructorForTypeMetal(srcType, destElemCount)); + bcatcstr(glsl, "("); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[1], typeFlags, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_F32TOF16: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F32TOF16\n"); + } + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); + + bcatcstr(glsl, "as_type(half2("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE, (1 << i)); + bcatcstr(glsl, ", 0.0))"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_F16TOF32: + { + uint32_t writeMask = psInst->asOperands[0].GetAccessMask(); + + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//F16TOF32\n"); + } + + for (int i = 0; i < 4; i++) + { + if ((writeMask & (1 << i)) == 0) + continue; + psContext->AddIndentation(); + psInst->asOperands[0].ui32CompMask = (1 << i); + psInst->asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE; + AddAssignToDest(&psInst->asOperands[0], SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); + + bcatcstr(glsl, "as_type("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_AUTO_BITCAST_TO_UINT, (1 << i)); + bcatcstr(glsl, ").x"); + AddAssignPrologue(numParenthesis); + } + break; + } + case OPCODE_INEG: + { + int numParenthesis = 0; + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//INEG\n"); + } + //dest = 0 - src0 + psContext->AddIndentation(); + + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); + + bcatcstr(glsl, "0 - "); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_DERIV_RTX_COARSE: + case OPCODE_DERIV_RTX_FINE: + case OPCODE_DERIV_RTX: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTX\n"); + } + CallHelper1("dfdx", psInst, 0, 1, 1); + break; + } + case OPCODE_DERIV_RTY_COARSE: + case OPCODE_DERIV_RTY_FINE: + case OPCODE_DERIV_RTY: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DERIV_RTY\n"); + } + CallHelper1("dfdy", psInst, 0, 1, 1); + break; + } + case OPCODE_LRP: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//LRP\n"); + } + CallHelper3("mix", psInst, 0, 2, 3, 1, 1); + break; + } + case OPCODE_DP2ADD: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//DP2ADD\n"); + } + psContext->AddIndentation(); + bool isFP16 = false; + if (CanForceToHalfOperand(&psInst->asOperands[0]) + && CanForceToHalfOperand(&psInst->asOperands[1]) + && CanForceToHalfOperand(&psInst->asOperands[2]) + && CanForceToHalfOperand(&psInst->asOperands[2])) + isFP16 = true; + int parenthesis = 0; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, 2, psInst->ui32PreciseMask, parenthesis); + + uint32_t flags = TO_AUTO_EXPAND_TO_VEC2; + flags |= isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT; + + bcatcstr(glsl, "dot("); + glsl << TranslateOperand(&psInst->asOperands[1], flags); + bcatcstr(glsl, ", "); + glsl << TranslateOperand(&psInst->asOperands[2], flags); + bcatcstr(glsl, ") + "); + glsl << TranslateOperand(&psInst->asOperands[3], flags); + AddAssignPrologue(parenthesis); + break; + } + case OPCODE_POW: + { + // TODO Check POW opcode whether it actually needs the abs + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//POW\n"); + } + psContext->AddIndentation(); + glsl << TranslateOperand(&psInst->asOperands[0], TO_FLAG_DESTINATION); + bcatcstr(glsl, " = powr(abs("); + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_NONE); + bcatcstr(glsl, "), "); + glsl << TranslateOperand(&psInst->asOperands[2], TO_FLAG_NONE); + bcatcstr(glsl, ");\n"); + break; + } + + case OPCODE_IMM_ATOMIC_ALLOC: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_ALLOC\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "atomic_fetch_add_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed)"); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_IMM_ATOMIC_CONSUME: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//IMM_ATOMIC_CONSUME\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_UINT, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "atomic_fetch_sub_explicit("); + glsl << ResourceName(RGROUP_UAV, psInst->asOperands[1].ui32RegisterNumber); + // Metal atomic sub returns previous value. Therefore minus one here to get the correct data index. + bcatcstr(glsl, "_counter, 1, memory_order::memory_order_relaxed) - 1"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_NOT: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//NOT\n"); + } + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], SVT_INT, psInst->asOperands[1].GetNumSwizzleElements(), psInst->ui32PreciseMask, numParenthesis); + + bcatcstr(glsl, "~("); + numParenthesis++; + glsl << TranslateOperand(&psInst->asOperands[1], TO_FLAG_INTEGER, psInst->asOperands[0].GetAccessMask()); + AddAssignPrologue(numParenthesis); + break; + } + case OPCODE_XOR: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//XOR\n"); + } + CallBinaryOp("^", psInst, 0, 1, 2, SVT_UINT); + break; + } + case OPCODE_RESINFO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//RESINFO\n"); + } + + const uint32_t mask = psInst->asOperands[0].GetAccessMask(); + for (int i = 0; i < 4; ++i) + { + if ((1 << i) & mask) + GetResInfoData(psInst, psInst->asOperands[2].aui32Swizzle[i], i); + } + + break; + } + + case OPCODE_BUFINFO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//BUFINFO\n"); + } + psContext->m_Reflection.OnDiagnostics("Metal shading language does not support buffer size query from shader. Pass the size to shader as const instead.\n", 0, true); + break; + } + + case OPCODE_SAMPLE_INFO: + { + if (psContext->flags & HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS) + { + psContext->AddIndentation(); + bcatcstr(glsl, "//SAMPLE_INFO\n"); + } + const RESINFO_RETURN_TYPE eResInfoReturnType = psInst->eResInfoReturnType; + psContext->AddIndentation(); + AddAssignToDest(&psInst->asOperands[0], eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_UINT ? SVT_UINT : SVT_FLOAT, 1, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, TranslateOperand(&psInst->asOperands[1], TO_FLAG_NAME_ONLY).c_str()); + bcatcstr(glsl, ".get_num_samples()"); + AddAssignPrologue(numParenthesis); + break; + } + + case OPCODE_DMAX: + case OPCODE_DMIN: + case OPCODE_DMUL: + case OPCODE_DEQ: + case OPCODE_DGE: + case OPCODE_DLT: + case OPCODE_DNE: + case OPCODE_DMOV: + case OPCODE_DMOVC: + case OPCODE_DTOF: + case OPCODE_FTOD: + case OPCODE_DDIV: + case OPCODE_DRCP: + case OPCODE_MSAD: + case OPCODE_DTOI: + case OPCODE_DTOU: + case OPCODE_ITOD: + case OPCODE_UTOD: + default: + { + ASSERT(0); + break; + } + } + + if (psInst->bSaturate) //Saturate is only for floating point data (float opcodes or MOV) + { + int dstCount = psInst->asOperands[0].GetNumSwizzleElements(); + psContext->AddIndentation(); + bool isFP16 = false; + if (psInst->asOperands[0].GetDataType(psContext) == SVT_FLOAT16) + isFP16 = true; + AddAssignToDest(&psInst->asOperands[0], isFP16 ? SVT_FLOAT16 : SVT_FLOAT, dstCount, psInst->ui32PreciseMask, numParenthesis); + bcatcstr(glsl, "clamp("); + + glsl << TranslateOperand(&psInst->asOperands[0], isFP16 ? TO_FLAG_FORCE_HALF : TO_AUTO_BITCAST_TO_FLOAT); + if (isFP16) + bcatcstr(glsl, ", 0.0h, 1.0h)"); + else + bcatcstr(glsl, ", 0.0f, 1.0f)"); + AddAssignPrologue(numParenthesis); + } +} + +#if ENABLE_UNIT_TESTS + +#define UNITY_EXTERNAL_TOOL 1 +#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS +#include "Runtime/Testing/Testing.h" + +UNIT_TEST_SUITE(ToMetalInstructionTests) +{ + static void TestAddOpAssignToDest(const char* expect, SHADER_VARIABLE_TYPE srcType, uint32_t srcDim, SHADER_VARIABLE_TYPE dstType, uint32_t dstDim) + { + bstring actual = bfromcstralloc(20, ""); + bstring expected = bfromcstralloc(20, expect); + int parenthesis = 0; + AddOpAssignToDest(actual, srcType, srcDim, dstType, dstDim, 0, parenthesis); + CHECK(bstrcmp(actual, expected) == 0); + bdestroy(actual); + bdestroy(expected); + } + + TEST(AddOpAssignToDest_Works) + { + // Different Type + TestAddOpAssignToDest(" = as_type(", SVT_INT, 1, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = uint(", SVT_INT, 1, SVT_UINT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_INT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 1, SVT_UINT, 1); + + TestAddOpAssignToDest(" = as_type(", SVT_INT16, 1, SVT_FLOAT16, 1); + TestAddOpAssignToDest(" = ushort(", SVT_INT16, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_INT16, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT16, 1, SVT_UINT16, 1); + + // Simply assign + TestAddOpAssignToDest(" = ", SVT_UINT16, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = ", SVT_INT, 4, SVT_INT, 2); + + // Up cast + TestAddOpAssignToDest(" = uint(", SVT_UINT16, 1, SVT_UINT, 1); + TestAddOpAssignToDest(" = float(", SVT_FLOAT16, 1, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = int(", SVT_INT16, 1, SVT_INT, 1); + + // Down cast + TestAddOpAssignToDest(" = ushort(", SVT_UINT, 1, SVT_UINT16, 1); + TestAddOpAssignToDest(" = half(", SVT_FLOAT, 1, SVT_FLOAT16, 1); + TestAddOpAssignToDest(" = short(", SVT_INT, 1, SVT_INT16, 1); + + // Increase dimensions + TestAddOpAssignToDest(" = float4(", SVT_FLOAT, 1, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = uint3(", SVT_UINT, 1, SVT_UINT, 3); + TestAddOpAssignToDest(" = uint2(", SVT_UINT, 1, SVT_UINT, 2); + + // Decrease dimensions + TestAddOpAssignToDest(" = ", SVT_FLOAT, 4, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = ", SVT_UINT, 3, SVT_UINT, 1); + TestAddOpAssignToDest(" = ", SVT_UINT, 2, SVT_UINT, 1); + + // Reinterop cast + Increase dimensions + TestAddOpAssignToDest(" = as_type(int4(", SVT_INT, 1, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = uint4(", SVT_INT, 1, SVT_UINT, 4); + TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_INT, 4); + TestAddOpAssignToDest(" = as_type(float4(", SVT_FLOAT, 1, SVT_UINT, 4); + + // Reinterop cast + Decrease dimensions + TestAddOpAssignToDest(" = as_type(", SVT_INT, 4, SVT_FLOAT, 1); + TestAddOpAssignToDest(" = uint(", SVT_INT, 4, SVT_UINT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_INT, 1); + TestAddOpAssignToDest(" = as_type(", SVT_FLOAT, 4, SVT_UINT, 1); + + // Different precision + Different Type + TestAddOpAssignToDest(" = float4(", SVT_INT16, 4, SVT_FLOAT, 4); + TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT16, 4); + + // Sanity check as low precision not used in metal they should fall back + TestAddOpAssignToDest(" = short4(", SVT_FLOAT, 4, SVT_INT12, 4); + TestAddOpAssignToDest(" = half4(", SVT_INT, 4, SVT_FLOAT10, 4); + } +} +#endif diff --git a/third_party/HLSLcc/src/toMetalOperand.cpp b/third_party/HLSLcc/src/toMetalOperand.cpp new file mode 100644 index 0000000..103d611 --- /dev/null +++ b/third_party/HLSLcc/src/toMetalOperand.cpp @@ -0,0 +1,1277 @@ +#include +#include "internal_includes/HLSLccToolkit.h" +#include "internal_includes/HLSLCrossCompilerContext.h" +#include "hlslcc.h" +#include "internal_includes/debug.h" +#include "internal_includes/Shader.h" +#include "internal_includes/toMetal.h" +#include +#include + +#include +#include + +using namespace HLSLcc; + +#ifdef _MSC_VER +#if _MSC_VER < 1900 +#define snprintf _snprintf +#endif +#endif + +#ifndef fpcheck +#ifdef _MSC_VER +#define fpcheck(x) (_isnan(x) || !_finite(x)) +#else +#define fpcheck(x) (std::isnan(x) || std::isinf(x)) +#endif +#endif // #ifndef fpcheck + + +// Returns nonzero if types are just different precisions of the same underlying type +static bool AreTypesCompatibleMetal(SHADER_VARIABLE_TYPE a, uint32_t ui32TOFlag) +{ + SHADER_VARIABLE_TYPE b = TypeFlagsToSVTType(ui32TOFlag); + + if (a == b) + return true; + + // Special case for array indices: both uint and int are fine + if ((ui32TOFlag & TO_FLAG_INTEGER) && (ui32TOFlag & TO_FLAG_UNSIGNED_INTEGER) && + (a == SVT_INT || a == SVT_INT16 || a == SVT_UINT || a == SVT_UINT16)) + return true; + + return false; +} + +std::string ToMetal::TranslateOperandSwizzle(const Operand* psOperand, uint32_t ui32ComponentMask, int iRebase, bool includeDot /*= true*/) +{ + std::ostringstream oss; + uint32_t accessMask = ui32ComponentMask & psOperand->GetAccessMask(); + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar inputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarInput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + if (psOperand->eType == OPERAND_TYPE_OUTPUT) + { + int regSpace = psOperand->GetRegisterSpace(psContext); + // Skip swizzle for scalar outputs, but only if we haven't redirected them + if (regSpace == 0) + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + else + { + if ((psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0) && + (psContext->psShader->abScalarOutput[regSpace][psOperand->ui32RegisterNumber] & accessMask)) + { + return ""; + } + } + } + + if (psOperand->iWriteMaskEnabled && + psOperand->iNumComponents != 1) + { + //Component Mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_MASK_MODE) + { + uint32_t mask; + if (psOperand->ui32CompMask != 0) + mask = psOperand->ui32CompMask & ui32ComponentMask; + else + mask = ui32ComponentMask; + + if (mask != 0 && mask != OPERAND_4_COMPONENT_MASK_ALL) + { + if (includeDot) + oss << "."; + if (mask & OPERAND_4_COMPONENT_MASK_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + if (mask & OPERAND_4_COMPONENT_MASK_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + if (mask & OPERAND_4_COMPONENT_MASK_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + if (mask & OPERAND_4_COMPONENT_MASK_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + else + //Component Swizzle + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + if (ui32ComponentMask != OPERAND_4_COMPONENT_MASK_ALL || + !(psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X && + psOperand->aui32Swizzle[1] == OPERAND_4_COMPONENT_Y && + psOperand->aui32Swizzle[2] == OPERAND_4_COMPONENT_Z && + psOperand->aui32Swizzle[3] == OPERAND_4_COMPONENT_W + ) + ) + { + uint32_t i; + + if (includeDot) + oss << "."; + + for (i = 0; i < 4; ++i) + { + if (!(ui32ComponentMask & (OPERAND_4_COMPONENT_MASK_X << i))) + continue; + + if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else if (psOperand->aui32Swizzle[i] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + } + else if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) // ui32ComponentMask is ignored in this case + { + if (includeDot) + oss << "."; + + if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_X) + { + ASSERT(iRebase == 0); + oss << "x"; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Y) + { + ASSERT(iRebase <= 1); + oss << "xy"[1 - iRebase]; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_Z) + { + ASSERT(iRebase <= 2); + oss << "xyz"[2 - iRebase]; + } + else if (psOperand->aui32Swizzle[0] == OPERAND_4_COMPONENT_W) + { + ASSERT(iRebase <= 3); + oss << "xyzw"[3 - iRebase]; + } + } + } + return oss.str(); +} + +std::string ToMetal::TranslateOperandIndex(const Operand* psOperand, int index) +{ + int i = index; + std::ostringstream oss; + ASSERT(index < psOperand->iIndexDims); + + switch (psOperand->eIndexRep[i]) + { + case OPERAND_INDEX_IMMEDIATE32: + { + oss << "[" << psOperand->aui32ArraySizes[i] << "]"; + return oss.str(); + } + case OPERAND_INDEX_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << "]"; + return oss.str(); + } + case OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE: + { + oss << "[" << TranslateOperand(psOperand->m_SubOperands[i].get(), TO_FLAG_INTEGER) << " + " << psOperand->aui32ArraySizes[i] << "]"; + return oss.str(); + } + default: + { + ASSERT(0); + return ""; + break; + } + } +} + +/*static std::string GetBitcastOp(HLSLCrossCompilerContext *psContext, SHADER_VARIABLE_TYPE from, SHADER_VARIABLE_TYPE to, uint32_t numComponents) +{ + if (psContext->psShader->eTargetLanguage == LANG_METAL) + { + std::ostringstream oss; + oss << "as_type<"; + oss << GetConstructorForTypeMetal(to, numComponents); + oss << ">"; + return oss.str(); + } + else + { + if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_INT) + return "intBitsToFloat"; + else if ((to == SVT_FLOAT || to == SVT_FLOAT16 || to == SVT_FLOAT10) && from == SVT_UINT) + return "uintBitsToFloat"; + else if (to == SVT_INT && from == SVT_FLOAT) + return "floatBitsToInt"; + else if (to == SVT_UINT && from == SVT_FLOAT) + return "floatBitsToUint"; + } + + ASSERT(0); + return "ERROR missing components in GetBitcastOp()"; +}*/ + + +// Helper function to print floats with full precision +static std::string printFloat(float f) +{ + char temp[30]; + + snprintf(temp, 30, "%.9g", f); + char * ePos = strchr(temp, 'e'); + char * pointPos = strchr(temp, '.'); + + if (ePos == NULL && pointPos == NULL && !fpcheck(f)) + return std::string(temp) + ".0"; + else + return std::string(temp); +} + +// Helper function to print out a single 32-bit immediate value in desired format +static std::string printImmediate32(uint32_t value, SHADER_VARIABLE_TYPE eType) +{ + std::ostringstream oss; + int needsParenthesis = 0; + + // Print floats as bit patterns. + if ((eType == SVT_FLOAT || eType == SVT_FLOAT16 || eType == SVT_FLOAT10) && fpcheck(*((float *)(&value)))) + { + oss << "as_type("; + eType = SVT_INT; + needsParenthesis = 1; + } + + switch (eType) + { + default: + ASSERT(0); + case SVT_INT: + case SVT_INT16: + case SVT_INT12: + // Need special handling for anything >= uint 0x3fffffff + if (value > 0x3ffffffe) + oss << "int(0x" << std::hex << value << "u)"; + else + oss << "0x" << std::hex << value << ""; + break; + case SVT_UINT: + case SVT_UINT16: + oss << "0x" << std::hex << value << "u"; + break; + case SVT_FLOAT: + case SVT_FLOAT10: + case SVT_FLOAT16: + oss << printFloat(*((float *)(&value))); + break; + case SVT_BOOL: + if (value == 0) + oss << "false"; + else + oss << "true"; + } + if (needsParenthesis) + oss << ")"; + + return oss.str(); +} + +static std::string MakeCBVarName(const std::string &cbName, const std::string &fullName, bool isUnityInstancingBuffer) +{ + // For Unity instancing buffer: "CBufferName.StructTypeName[] -> CBufferName[]". See ToMetal::DeclareConstantBuffer. + if (isUnityInstancingBuffer && !cbName.empty() && cbName[cbName.size() - 1] == '.' && fullName.find_first_of('[') != std::string::npos) + { + return cbName.substr(0, cbName.size() - 1) + fullName.substr(fullName.find_first_of('[')); + } + return cbName + fullName; +} + +std::string ToMetal::TranslateVariableName(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t* pui32IgnoreSwizzle, uint32_t ui32CompMask, int *piRebase) +{ + std::ostringstream oss; + int numParenthesis = 0; + int hasCtor = 0; + int needsBoolUpscale = 0; // If nonzero, bools need * 0xffffffff in them + SHADER_VARIABLE_TYPE requestedType = TypeFlagsToSVTType(ui32TOFlag); + SHADER_VARIABLE_TYPE eType = psOperand->GetDataType(psContext, requestedType); + int numComponents = psOperand->GetNumSwizzleElements(ui32CompMask); + int requestedComponents = 0; + int scalarWithSwizzle = 0; + + *pui32IgnoreSwizzle = 0; + + if (psOperand->eType == OPERAND_TYPE_TEMP) + { + // Check for scalar + if (psContext->psShader->GetTempComponentCount(eType, psOperand->ui32RegisterNumber) == 1 && psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE) + { + scalarWithSwizzle = 1; // Going to need a constructor + } + } + + if (psOperand->eType == OPERAND_TYPE_INPUT) + { + // Check for scalar + // You would think checking would be easy but there is a caveat: + // checking abScalarInput might report as scalar, while in reality that was redirected and now is vector so swizzle must be preserved + // as an example consider we have input: + // float2 x; float y; + // and later on we do + // tex2D(xxx, fixed2(x.x, y)); + // in that case we will generate redirect but which ui32RegisterNumber will be used for it is not strictly "specified" + // so we may end up with treating it as scalar (even though it is vector now) + const int redirectInput = psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber]; + const bool wasRedirected = redirectInput == 0xFF || redirectInput == 0xFE; + + const int scalarInput = psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber]; + if (!wasRedirected && (scalarInput & psOperand->GetAccessMask()) && (psOperand->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)) + { + scalarWithSwizzle = 1; + *pui32IgnoreSwizzle = 1; + } + } + + if (piRebase) + *piRebase = 0; + + if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC2) + requestedComponents = 2; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC3) + requestedComponents = 3; + else if (ui32TOFlag & TO_AUTO_EXPAND_TO_VEC4) + requestedComponents = 4; + + requestedComponents = std::max(requestedComponents, numComponents); + + if (!(ui32TOFlag & (TO_FLAG_DESTINATION | TO_FLAG_NAME_ONLY | TO_FLAG_DECLARATION_NAME))) + { + if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64) + { + // Mark the operand type to match whatever we're asking for in the flags. + ((Operand *)psOperand)->aeDataType[0] = requestedType; + ((Operand *)psOperand)->aeDataType[1] = requestedType; + ((Operand *)psOperand)->aeDataType[2] = requestedType; + ((Operand *)psOperand)->aeDataType[3] = requestedType; + } + + bool bitcast = false; + if (AreTypesCompatibleMetal(eType, ui32TOFlag) == 0) + { + if (CanDoDirectCast(psContext, eType, requestedType)) + { + hasCtor = 1; + if (eType == SVT_BOOL) + { + needsBoolUpscale = 1; + // make sure to wrap the whole thing in parens so the upscale + // multiply only applies to the bool + oss << "("; + numParenthesis++; + } + oss << GetConstructorForType(psContext, requestedType, requestedComponents, false) << "("; + numParenthesis++; + } + else + { + // Direct cast not possible, need to do bitcast. + oss << "as_type<" << GetConstructorForTypeMetal(requestedType, requestedComponents) << ">("; + hasCtor = 1; + bitcast = true; + numParenthesis++; + } + } + + // Add ctor if needed (upscaling). Type conversion is already handled above, so here we must + // use the original type to not make type conflicts in bitcasts + bool needsUpscaling = ((numComponents < requestedComponents) || (scalarWithSwizzle != 0)) && (hasCtor == 0 || bitcast); + + // Add constuctor if half precision is forced to avoid template ambiguity error from compiler + bool needsForcedCtor = (ui32TOFlag & TO_FLAG_FORCE_HALF) && (psOperand->eType == OPERAND_TYPE_IMMEDIATE32 || psOperand->eType == OPERAND_TYPE_IMMEDIATE64); + + if (needsForcedCtor) + requestedComponents = std::max(requestedComponents, 1); + + if (needsUpscaling || needsForcedCtor) + { + oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; + + numParenthesis++; + hasCtor = 1; + } + } + + + switch (psOperand->eType) + { + case OPERAND_TYPE_IMMEDIATE32: + { + if (psOperand->iNumComponents == 1) + { + oss << printImmediate32(*((unsigned int*)(&psOperand->afImmediates[0])), requestedType); + } + else + { + int i; + int firstItemAdded = 0; + if (hasCtor == 0) + { + oss << GetConstructorForTypeMetal(requestedType, requestedComponents) << "("; + numParenthesis++; + hasCtor = 1; + } + for (i = 0; i < 4; i++) + { + uint32_t uval; + if (!(ui32CompMask & (1 << i))) + continue; + + if (firstItemAdded) + oss << ", "; + uval = *((uint32_t*)(&psOperand->afImmediates[i >= psOperand->iNumComponents ? psOperand->iNumComponents - 1 : i])); + oss << printImmediate32(uval, requestedType); + firstItemAdded = 1; + } + oss << ")"; + *pui32IgnoreSwizzle = 1; + numParenthesis--; + } + break; + } + case OPERAND_TYPE_IMMEDIATE64: + { + ASSERT(0); // doubles not supported on Metal + break; + } + case OPERAND_TYPE_INPUT: + { + int regSpace = psOperand->GetRegisterSpace(psContext); + switch (psOperand->iIndexDims) + { + case INDEX_2D: + { + const ShaderInfo::InOutSignature *psSig = NULL; + psContext->psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->ui32CompMask, &psSig); + if (psContext->psShader->eShaderType == HULL_SHADER || psContext->psShader->eShaderType == DOMAIN_SHADER) + { + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, 1, pui32IgnoreSwizzle); + } + else + { + // Not sure if this codepath is active outside hull/domain + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + break; + } + default: + { + if (psOperand->eIndexRep[0] == OPERAND_INDEX_IMMEDIATE32_PLUS_RELATIVE) + { + ASSERT(psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0); + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_FLAG_INTEGER); + oss << "]"; + } + else + { + if (psContext->psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0) + { + const uint32_t parentIndex = psContext->psShader->aIndexedInputParents[regSpace][psOperand->ui32RegisterNumber]; + oss << "phase" << psContext->currentPhase << "_Input" << regSpace << "_" << parentIndex << "[" << (psOperand->ui32RegisterNumber - parentIndex) << "]"; + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, 0, pui32IgnoreSwizzle); + } + } + break; + } + } + break; + } + case OPERAND_TYPE_OUTPUT: + case OPERAND_TYPE_OUTPUT_DEPTH: + case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL: + case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL: + { + int stream = 0; + oss << psContext->GetDeclaredOutputName(psOperand, &stream, pui32IgnoreSwizzle, piRebase, 0); + if (psOperand->m_SubOperands[0].get()) + { + oss << "["; + oss << TranslateOperand(psOperand->m_SubOperands[0].get(), TO_AUTO_BITCAST_TO_INT); + oss << "]"; + } + break; + } + case OPERAND_TYPE_TEMP: + { + SHADER_VARIABLE_TYPE eTempType = psOperand->GetDataType(psContext); + + if (psOperand->eSpecialName == NAME_UNDEFINED && psOperand->specialName.length()) + { + oss << psOperand->specialName; + break; + } + + oss << HLSLCC_TEMP_PREFIX; + ASSERT(psOperand->ui32RegisterNumber < 0x10000); // Sanity check after temp splitting. + switch (eTempType) + { + case SVT_FLOAT: + ASSERT(psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] != 0); + if (psContext->psShader->psFloatTempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT16: + ASSERT(psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("16_"); + if (psContext->psShader->psFloat16TempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_FLOAT10: + ASSERT(psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("10_"); + if (psContext->psShader->psFloat10TempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT: + ASSERT(psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i"); + if (psContext->psShader->psIntTempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT16: + ASSERT(psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i16_"); + if (psContext->psShader->psInt16TempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_INT12: + ASSERT(psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("i12_"); + if (psContext->psShader->psInt12TempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT: + ASSERT(psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u"); + if (psContext->psShader->psUIntTempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_UINT16: + ASSERT(psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("u16_"); + if (psContext->psShader->psUInt16TempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_DOUBLE: + ASSERT(psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("d"); + if (psContext->psShader->psDoubleTempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + case SVT_BOOL: + ASSERT(psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] != 0); + oss << ("b"); + if (psContext->psShader->psBoolTempSizes[psOperand->ui32RegisterNumber] == 1) + *pui32IgnoreSwizzle = 1; + break; + default: + ASSERT(0 && "Should never get here!"); + } + oss << psOperand->ui32RegisterNumber; + break; + } + case OPERAND_TYPE_SPECIAL_IMMCONSTINT: + case OPERAND_TYPE_SPECIAL_IMMCONST: + case OPERAND_TYPE_SPECIAL_OUTBASECOLOUR: + case OPERAND_TYPE_SPECIAL_OUTOFFSETCOLOUR: + case OPERAND_TYPE_SPECIAL_FOG: + case OPERAND_TYPE_SPECIAL_ADDRESS: + case OPERAND_TYPE_SPECIAL_LOOPCOUNTER: + case OPERAND_TYPE_SPECIAL_TEXCOORD: + { + ASSERT(0 && "DX9 shaders no longer supported!"); + break; + } + case OPERAND_TYPE_SPECIAL_POSITION: + { + ASSERT(0 && "TODO normal shader support"); +// bcatcstr(glsl, "gl_Position"); + break; + } + case OPERAND_TYPE_SPECIAL_POINTSIZE: + { + ASSERT(0 && "TODO normal shader support"); + // bcatcstr(glsl, "gl_PointSize"); + break; + } + case OPERAND_TYPE_CONSTANT_BUFFER: + { + const ConstantBuffer* psCBuf = NULL; + const ShaderVarType* psVarType = NULL; + int32_t index = -1; + std::vector arrayIndices; + bool isArray = false; + bool isFBInput = false; + psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf); + ASSERT(psCBuf != NULL); + + if (ui32TOFlag & TO_FLAG_DECLARATION_NAME) + { + pui32IgnoreSwizzle[0] = 1; + } + std::string cbName = ""; + if (psCBuf) + { + //$Globals. + cbName = GetCBName(psCBuf->name); + cbName += "."; + // Drop the constant buffer name from subpass inputs + if (cbName.substr(0, 19) == "hlslcc_SubpassInput") + cbName = ""; + } + + if ((ui32TOFlag & TO_FLAG_DECLARATION_NAME) != TO_FLAG_DECLARATION_NAME) + { + //Work out the variable name. Don't apply swizzle to that variable yet. + int32_t rebase = 0; + + ASSERT(psCBuf != NULL); + + uint32_t componentsNeeded = 1; + if (psOperand->eSelMode != OPERAND_4_COMPONENT_SELECT_1_MODE) + { + uint32_t minSwiz = 3; + uint32_t maxSwiz = 0; + int i; + for (i = 0; i < 4; i++) + { + if ((ui32CompMask & (1 << i)) == 0) + continue; + minSwiz = std::min(minSwiz, psOperand->aui32Swizzle[i]); + maxSwiz = std::max(maxSwiz, psOperand->aui32Swizzle[i]); + } + componentsNeeded = maxSwiz - minSwiz + 1; + } + + // When we have a component mask that doesn't have .x set (this basically only happens when we manually open operands into components) + // We have to pull down the swizzle array to match the first bit that's actually set + uint32_t tmpSwizzle[4] = { 0 }; + int firstBitSet = 0; + if (ui32CompMask == 0) + ui32CompMask = 0xf; + while ((ui32CompMask & (1 << firstBitSet)) == 0) + firstBitSet++; + std::copy(&psOperand->aui32Swizzle[firstBitSet], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &psVarType, &isArray, &arrayIndices, &rebase, psContext->flags); + + // Get a possible dynamic array index + std::string dynamicIndexStr; + bool needsIndexCalcRevert = false; + bool isAoS = ((!isArray && arrayIndices.size() > 0) || (isArray && arrayIndices.size() > 1)); + bool isUnityInstancingBuffer = isAoS && IsUnityFlexibleInstancingBuffer(psCBuf); + Operand *psDynIndexOp = psOperand->GetDynamicIndexOperand(psContext, psVarType, isAoS, &needsIndexCalcRevert); + + if (psDynIndexOp != NULL) + { + SHADER_VARIABLE_TYPE eType = psDynIndexOp->GetDataType(psContext); + uint32_t opFlags = TO_FLAG_INTEGER; + + if (eType != SVT_INT && eType != SVT_UINT) + opFlags = TO_AUTO_BITCAST_TO_INT; + + dynamicIndexStr = TranslateOperand(psDynIndexOp, opFlags, 0x1); // Just take the first component for the index + } + + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE || (componentsNeeded <= psVarType->Columns)) + { + // Simple case: just access one component + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(psVarType, arrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + + // Special hack for MSAA subpass inputs: in Metal we can only read the "current" sample, so ignore the index + if (strncmp(fullName.c_str(), "hlslcc_fbinput", 14) == 0) + isFBInput = true; + + if (((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0) && ((psVarType->Class == SVC_MATRIX_ROWS) || (psVarType->Class == SVC_MATRIX_COLUMNS))) + { + // We'll need to add the prefix only to the last section of the name + size_t commaPos = fullName.find_last_of('.'); + char prefix[256]; + sprintf(prefix, HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING, psVarType->Rows, psVarType->Columns); + if (commaPos == std::string::npos) + fullName.insert(0, prefix); + else + fullName.insert(commaPos + 1, prefix); + } + + oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); + } + else + { + // Non-simple case: build vec4 and apply mask + uint32_t i; + int32_t tmpRebase; + std::vector tmpArrayIndices; + bool tmpIsArray; + int firstItemAdded = 0; + + oss << GetConstructorForTypeMetal(psVarType->Type, GetNumberBitsSet(ui32CompMask)) << "("; + for (i = 0; i < 4; i++) + { + const ShaderVarType *tmpVarType = NULL; + if ((ui32CompMask & (1 << i)) == 0) + continue; + tmpRebase = 0; + if (firstItemAdded != 0) + oss << ", "; + else + firstItemAdded = 1; + + uint32_t tmpSwizzle[4] = { 0 }; + std::copy(&psOperand->aui32Swizzle[i], &psOperand->aui32Swizzle[4], &tmpSwizzle[0]); + + ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], tmpSwizzle, psCBuf, &tmpVarType, &tmpIsArray, &tmpArrayIndices, &tmpRebase, psContext->flags); + std::string fullName = ShaderInfo::GetShaderVarIndexedFullName(tmpVarType, tmpArrayIndices, dynamicIndexStr, needsIndexCalcRevert, psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES); + oss << MakeCBVarName(cbName, fullName, isUnityInstancingBuffer); + + if (tmpVarType->Class != SVC_SCALAR) + { + uint32_t swizzle; + tmpRebase /= 4; // 0 => 0, 4 => 1, 8 => 2, 12 /= 3 + swizzle = psOperand->aui32Swizzle[i] - tmpRebase; + + oss << "." << ("xyzw"[swizzle]); + } + } + oss << ")"; + // Clear rebase, we've already done it. + rebase = 0; + // Also swizzle. + *pui32IgnoreSwizzle = 1; + } + + + if (isArray) + { + index = arrayIndices.back(); + + // Dynamic index is atm supported only at the root array level. Add here only if there is no such parent. + bool hasDynamicIndex = !dynamicIndexStr.empty() && (arrayIndices.size() <= 1); + bool hasImmediateIndex = (index != -1) && !(hasDynamicIndex && index == 0); + + // Ignore index altogether on fb inputs + if (isFBInput) + { + // Nothing to do here + } + else if (hasDynamicIndex || hasImmediateIndex) + { + std::ostringstream fullIndexOss; + if (hasDynamicIndex && hasImmediateIndex) + fullIndexOss << "(" << dynamicIndexStr << " + " << index << ")"; + else if (hasDynamicIndex) + fullIndexOss << dynamicIndexStr; + else // hasImmediateStr + fullIndexOss << index; + + if (((psVarType->Class == SVC_MATRIX_COLUMNS) || (psVarType->Class == SVC_MATRIX_ROWS)) && (psVarType->Elements > 1) && ((psContext->flags & HLSLCC_FLAG_TRANSLATE_MATRICES) == 0)) + { + // Special handling for old matrix arrays + oss << "[" << fullIndexOss.str() << " / 4]"; + oss << "[" << fullIndexOss.str() << " %% 4]"; + } + else // This path is atm the default + { + oss << "[" << fullIndexOss.str() << "]"; + } + } + } + + if (psVarType->Class == SVC_VECTOR && !*pui32IgnoreSwizzle) + { + switch (rebase) + { + case 4: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) + oss << ".xxyx"; + } + else if (psVarType->Columns == 3) + { + //.x(GLSL) is .y(HLSL). .y(GLSL) is .z(HLSL) .z(GLSL) is .w(HLSL) + oss << ".xxyz"; + } + break; + } + case 8: + { + if (psVarType->Columns == 2) + { + //.x(GLSL) is .z(HLSL). .y(GLSL) is .w(HLSL) + oss << ".xxxy"; + } + break; + } + case 0: + default: + { + //No rebase, but extend to vec4. + if (psVarType->Columns == 2) + { + oss << ".xyxx"; + } + else if (psVarType->Columns == 3) + { + oss << ".xyzx"; + } + break; + } + } + } + + if (psVarType->Class == SVC_SCALAR) + { + *pui32IgnoreSwizzle = 1; + + // CB arrays are all declared as 4-component vectors to match DX11 data layout. + // Therefore add swizzle here to access the element corresponding to the scalar var. + if ((psVarType->Elements > 0) && (psContext->psShader->eShaderType == COMPUTE_SHADER)) + { + oss << ".x"; + } + } + } + break; + } + case OPERAND_TYPE_RESOURCE: + { + oss << ResourceName(RGROUP_TEXTURE, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_SAMPLER: + { + oss << ResourceName(RGROUP_SAMPLER, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_FUNCTION_BODY: + { + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID: + case OPERAND_TYPE_INPUT_JOIN_INSTANCE_ID: + { + oss << "phaseInstanceID"; // Not a real builtin, but passed as a function parameter. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: + { + oss << "ImmCB_" << psContext->currentPhase; + oss << TranslateOperandIndex(psOperand, 0); + break; + } + case OPERAND_TYPE_INPUT_DOMAIN_POINT: + { + oss << "mtl_TessCoord"; + break; + } + case OPERAND_TYPE_INPUT_CONTROL_POINT: + { + int ignoreRedirect = 1; + int regSpace = psOperand->GetRegisterSpace(psContext); + + if ((regSpace == 0 && psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe) || + (regSpace == 1 && psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)) + { + ignoreRedirect = 0; + } + + if (ignoreRedirect) + { + oss << "input.cp"; + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + oss << "." << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + } + else + { + oss << psContext->GetDeclaredInputName(psOperand, piRebase, ignoreRedirect, pui32IgnoreSwizzle); + oss << TranslateOperandIndex(psOperand, 0);//Vertex index + } + + // Check for scalar + if ((psContext->psShader->abScalarInput[psOperand->GetRegisterSpace(psContext)][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_NULL: + { + // Null register, used to discard results of operations + oss << "//null"; + break; + } + case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID: + { + oss << "controlPointID"; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_OUTPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_COVERAGE_MASK: + { + oss << "mtl_CoverageMask"; + //Skip swizzle on scalar types. + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID://SV_DispatchThreadID + { + oss << "mtl_ThreadID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP://SV_GroupThreadID + { + oss << "mtl_ThreadIDInGroup"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_GROUP_ID://SV_GroupID + { + oss << "mtl_ThreadGroupID"; + break; + } + case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED://SV_GroupIndex + { + if (requestedComponents > 1 && !hasCtor) + { + oss << GetConstructorForType(psContext, eType, requestedComponents, false) << "("; + numParenthesis++; + hasCtor = 1; + } + for (uint32_t i = 0; i < requestedComponents; i++) + { + oss << "mtl_ThreadIndexInThreadGroup"; + if (i < requestedComponents - 1) + oss << ", "; + } + *pui32IgnoreSwizzle = 1; // No swizzle meaningful for scalar. + break; + } + case OPERAND_TYPE_UNORDERED_ACCESS_VIEW: + { + oss << ResourceName(RGROUP_UAV, psOperand->ui32RegisterNumber); + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY: + { + oss << "TGSM" << psOperand->ui32RegisterNumber; + *pui32IgnoreSwizzle = 1; + break; + } + case OPERAND_TYPE_INPUT_PRIMITIVEID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INDEXABLE_TEMP: + { + oss << "TempArray" << psOperand->aui32ArraySizes[0] << "["; + if (psOperand->aui32ArraySizes[1] != 0 || !psOperand->m_SubOperands[1].get()) + oss << psOperand->aui32ArraySizes[1]; + + if (psOperand->m_SubOperands[1].get()) + { + if (psOperand->aui32ArraySizes[1] != 0) + oss << "+"; + oss << TranslateOperand(psOperand->m_SubOperands[1].get(), TO_FLAG_INTEGER); + } + oss << "]"; + break; + } + case OPERAND_TYPE_STREAM: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_INPUT_GS_INSTANCE_ID: + { + // Not supported on Metal + ASSERT(0); + break; + } + case OPERAND_TYPE_THIS_POINTER: + { + ASSERT(0); // Nope. + break; + } + case OPERAND_TYPE_INPUT_PATCH_CONSTANT: + { + const ShaderInfo::InOutSignature* psIn; + psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn); + *piRebase = psIn->iRebase; + switch (psIn->eSystemValueType) + { + case NAME_POSITION: + oss << "mtl_Position"; + break; + case NAME_RENDER_TARGET_ARRAY_INDEX: + oss << "mtl_Layer"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_CLIP_DISTANCE: + // this is temp variable, declaration and redirecting to actual output is handled in DeclareClipPlanes + char tmpName[128]; sprintf(tmpName, "phase%d_ClipDistance%d", psContext->currentPhase, psIn->ui32SemanticIndex); + oss << tmpName; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VIEWPORT_ARRAY_INDEX: + oss << "mtl_ViewPortIndex"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_VERTEX_ID: + oss << "mtl_VertexID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_INSTANCE_ID: + oss << "mtl_InstanceID"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_IS_FRONT_FACE: + oss << "(mtl_FrontFace ? 0xffffffffu : uint(0))"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_PRIMITIVE_ID: + // Not on Metal + ASSERT(0); + break; + + // as far as i understand tesselation factors are always coming from tessFactor variable (it is always declared in ToMetal::Translate) + case NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DENSITY_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "tessFactor.edgeTessellationFactor"; + else + oss << "tessFactor.edgeTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR: + case NAME_FINAL_LINE_DETAIL_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR: + case NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[2]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR: + oss << "tessFactor.edgeTessellationFactor[3]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_TRI_INSIDE_TESSFACTOR: + case NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR: + if (psContext->psShader->aIndexedOutput[1][psOperand->ui32RegisterNumber]) + oss << "tessFactor.insideTessellationFactor"; + else + oss << "tessFactor.insideTessellationFactor[0]"; + *pui32IgnoreSwizzle = 1; + break; + case NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR: + oss << "tessFactor.insideTessellationFactor[1]"; + *pui32IgnoreSwizzle = 1; + break; + + default: + const std::string patchPrefix = "patch."; + + if (psContext->psShader->eShaderType == DOMAIN_SHADER) + oss << psContext->inputPrefix << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + else + oss << patchPrefix << psIn->semanticName << psIn->ui32SemanticIndex; + + // Disable swizzles if this is a scalar + if (psContext->psShader->eShaderType == HULL_SHADER) + { + if ((psContext->psShader->abScalarOutput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + else + { + if ((psContext->psShader->abScalarInput[1][psOperand->ui32RegisterNumber] & psOperand->GetAccessMask()) != 0) + *pui32IgnoreSwizzle = 1; + } + break; + } + break; + } + default: + { + ASSERT(0); + break; + } + } + + if (hasCtor && (*pui32IgnoreSwizzle == 0)) + { + oss << TranslateOperandSwizzle(psOperand, ui32CompMask, piRebase ? *piRebase : 0); + *pui32IgnoreSwizzle = 1; + } + + if (needsBoolUpscale) + { + if (requestedType == SVT_UINT || requestedType == SVT_UINT16 || requestedType == SVT_UINT8) + oss << ") * 0xffffffffu"; + else + oss << ") * int(0xffffffffu)"; + numParenthesis--; + + oss << ")"; + numParenthesis--; + } + + while (numParenthesis != 0) + { + oss << ")"; + numParenthesis--; + } + return oss.str(); +} + +std::string ToMetal::TranslateOperand(const Operand* psOperand, uint32_t ui32TOFlag, uint32_t ui32ComponentMask) +{ + std::ostringstream oss; + uint32_t ui32IgnoreSwizzle = 0; + int iRebase = 0; + + // in single-component mode there is no need to use mask + if (psOperand->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE) + ui32ComponentMask = OPERAND_4_COMPONENT_MASK_ALL; + + if (ui32TOFlag & TO_FLAG_NAME_ONLY) + { + return TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, OPERAND_4_COMPONENT_MASK_ALL, &iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << ("(-"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << ("abs("); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << ("-abs("); + break; + } + } + + oss << TranslateVariableName(psOperand, ui32TOFlag, &ui32IgnoreSwizzle, ui32ComponentMask, &iRebase); + + if (!ui32IgnoreSwizzle) + { + oss << TranslateOperandSwizzle(psOperand, ui32ComponentMask, iRebase); + } + + switch (psOperand->eModifier) + { + case OPERAND_MODIFIER_NONE: + { + break; + } + case OPERAND_MODIFIER_NEG: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABS: + { + oss << (")"); + break; + } + case OPERAND_MODIFIER_ABSNEG: + { + oss << (")"); + break; + } + } + return oss.str(); +}