This commit is contained in:
Nanako 2024-02-01 12:42:58 +08:00
parent 2cc7cf4f32
commit bbd2b83e7a
141 changed files with 14229 additions and 43622 deletions

View File

@ -61,7 +61,7 @@ add_subdirectory(third_party/imgui)
add_subdirectory(third_party/sdl)
add_subdirectory(third_party/portaudio)
add_subdirectory(third_party/spdlog)
add_subdirectory(third_party/HLSLcc)
add_subdirectory(third_party/slang)
# setup portaudio
set(PA_USE_ASIO ON CACHE BOOL "" FORCE)
@ -99,7 +99,6 @@ set(SDL_LOCALE OFF CACHE BOOL "" FORCE)
set(SDL_MISC OFF CACHE BOOL "" FORCE)
set(SDL_MMX OFF CACHE BOOL "" FORCE)
set(SDL_OFFSCREEN OFF CACHE BOOL "" FORCE)
set(SDL_OPENGLES OFF CACHE BOOL "" FORCE)
set(SDL_POWER OFF CACHE BOOL "" FORCE)
set(SDL_RENDER OFF CACHE BOOL "" FORCE)
set(SDL_RENDER_D3D OFF CACHE BOOL "" FORCE)
@ -114,17 +113,19 @@ set(SDL_TIMERS OFF CACHE BOOL "" FORCE)
set(SDL_VIRTUAL_JOYSTICK OFF CACHE BOOL "" FORCE)
set(SDL_TEST_LIBRARY OFF CACHE BOOL "" FORCE)
if (WIN32 OR (UNIX AND NOT APPLE))
set(SDL_VULKAN ON CACHE BOOL "" FORCE)
else()
set(SDL_VULKAN OFF CACHE BOOL "" FORCE)
set(SDL_VULKAN ON CACHE BOOL "" FORCE)
set(SDL_OPENGL ON CACHE BOOL "" FORCE)
set(SDL_OPENGLES OFF CACHE BOOL "" FORCE)
if (APPLE)
set(SDL_METAL ON CACHE BOOL "" FORCE)
else()
set(SDL_METAL OFF CACHE BOOL "" FORCE)
endif()
set(SDL_WASAPI OFF CACHE BOOL "" FORCE)
set(SDL_XINPUT OFF CACHE BOOL "" FORCE)
set(SDL_DISABLE_UNINSTALL ON CACHE BOOL "" FORCE)
set(SDL_OPENGL OFF CACHE BOOL "" FORCE)
# setup spdlog

View File

@ -8,10 +8,10 @@ retrieve_files(ALL_FILES)
add_library(${PROJECT_NAME} SHARED ${ALL_FILES})
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui HLSLcc)
target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui slang)
target_link_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui HLSLcc)
target_link_libraries(${PROJECT_NAME} PUBLIC imgui spdlog ${SDL2_LIBRARIES} HLSLcc)
target_link_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR} spdlog imgui)
target_link_libraries(${PROJECT_NAME} PUBLIC imgui spdlog ${SDL2_LIBRARIES} slang)
target_precompile_headers(${PROJECT_NAME} PUBLIC extern.h)

View File

@ -8,6 +8,7 @@
#include "imgui_internal.h"
#include "filesystem/stb_image.h"
#include "rhi/texture.h"
#include "rhi/opengl/renderer_opengl.h"
#include "spdlog/async.h"
#include "spdlog/spdlog.h"
#include "spdlog/sinks/basic_file_sink.h"
@ -18,6 +19,7 @@
bool g_is_running = true;
bool g_exit_requested = false;
slang::IGlobalSession* g_slang_global_session = nullptr;
application::~application()
{
@ -26,6 +28,8 @@ application::~application()
void application::init(window_params in_window_params, int argc, char** argv)
{
slang::createGlobalSession(&g_slang_global_session);
try
{
auto async_file = spdlog::basic_logger_mt<spdlog::async_factory>("async_file_logger", "logs/log.txt");
@ -53,20 +57,24 @@ void application::init(window_params in_window_params, int argc, char** argv)
bool use_dx11 = false;
bool use_dx12 = false;
bool use_vulkan = false;
bool use_opengl = false;
#else
bool use_vulkan = true;
bool use_opengl = false;
#endif
#if WIN32
command_line::instance().get_arg("dx11", use_dx11);
command_line::instance().get_arg("dx12", use_dx12);
command_line::instance().get_arg("vulkan", use_vulkan);
command_line::instance().get_arg("opengl", use_opengl);
// only one renderer can be used at a time
const int renderer_count = use_dx11 + use_dx12 + use_vulkan;
const int renderer_count = use_dx11 + use_dx12 + use_vulkan + use_opengl;
assert(renderer_count <= 1);
// if no renderer is specified, use dx11
if (!(use_dx11 || use_dx12 || use_vulkan))
if (!(use_dx11 || use_dx12 || use_vulkan || use_opengl))
{
use_dx11 = true;
}
@ -75,6 +83,11 @@ void application::init(window_params in_window_params, int argc, char** argv)
{
renderer_ = new renderer_dx11();
}
else if (use_opengl)
{
renderer_ = new renderer_opengl();
window_flags |= SDL_WINDOW_OPENGL;
}
// if (use_dx12)
// {
// renderer_ = new renderer_dx12();
@ -90,11 +103,17 @@ void application::init(window_params in_window_params, int argc, char** argv)
renderer_ = new renderer_vulkan();
window_flags |= SDL_WINDOW_VULKAN;
}
else if (use_opengl)
{
renderer_ = new renderer_opengl();
window_flags |= SDL_WINDOW_OPENGL;
}
#endif
// if (!renderer_)
// renderer_ = new renderer_null();
renderer_->pre_init();
if (in_window_params.fullscreen)
window_flags |= SDL_WINDOW_FULLSCREEN;
if (in_window_params.borderless)
@ -121,6 +140,7 @@ void application::init(window_params in_window_params, int argc, char** argv)
SDL_ShowWindow(window_);
renderer_->init(window_);
renderer_->init_slang(R"(E:\Projects\AronaStudio\Arona\shaders\)");
renderer_->resize(in_window_params.width, in_window_params.height);
g_is_running = true;
}
@ -147,9 +167,9 @@ int application::run()
if (g_exit_requested)
break;
renderer_->new_frame();
renderer_->new_frame(window_);
draw_gui();
renderer_->end_frame();
renderer_->end_frame(window_);
}
return 0;
}
@ -162,7 +182,7 @@ void application::shutdown()
delete renderer_;
}
texture* application::load_texture(const std::string& path) const
std::shared_ptr<texture> application::load_texture(const std::string& path) const
{
int width = 0;
int height = 0;
@ -177,7 +197,7 @@ texture* application::load_texture(const std::string& path) const
return texture;
}
texture* application::create_texture(const unsigned char* data, const int width, const int height) const
std::shared_ptr<texture> application::create_texture(const unsigned char* data, const int width, const int height) const
{
return renderer_->create_texture(data, width, height);
}

View File

@ -2,12 +2,16 @@
#include <string>
#include "SDL.h"
#include "imgui.h"
#include "slang.h"
class renderer;
class texture;
class application;
extern bool g_is_running;
extern bool g_exit_requested;
extern slang::IGlobalSession* g_slang_global_session;
static application* g_app_instance = nullptr;
struct window_params
{
@ -26,18 +30,25 @@ struct window_params
class CORE_API application
{
public:
application() = default;
application()
{
g_app_instance = this;
}
virtual ~application();
application(const application&) = delete;
application(application&&) = delete;
static application* get()
{
return g_app_instance;
}
virtual void init(window_params in_window_params, int argc, char** argv);
virtual int run();
virtual void shutdown();
virtual void draw_gui() = 0;
virtual void init_imgui(ImGuiContext* in_context) = 0;
texture* load_texture(const std::string& path) const;
texture* create_texture(const unsigned char* data, const int width, const int height) const;
std::shared_ptr<texture> load_texture(const std::string& path) const;
std::shared_ptr<texture> create_texture(const unsigned char* data, const int width, const int height) const;
renderer* get_renderer() const { return renderer_; }
SDL_Window* get_window() const { return window_; }

View File

@ -1,2 +1 @@
#include "E:/Projects/Arona/build/core/CMakeFiles/core.dir/Debug/cmake_pch.hxx"
#include "ref_counting.h"
#include "ref_counting.h"

View File

@ -0,0 +1,51 @@
#pragma once
#define CHECK_GL_ERRORS \
{\
GLenum Error = glGetError();\
if (Error != 0)\
spdlog::critical("GL error: 0x{:x}", Error);\
}
#define GL_READ_FRAMEBUFFER 0x8CA8
#define GL_DRAW_FRAMEBUFFER 0x8CA9
#define GL_READ_FRAMEBUFFER_BINDING 0x8CAA
#define GL_MAX_COLOR_ATTACHMENTS 0x8CDF
#define GL_COLOR_ATTACHMENT0 0x8CE0
#define GL_COLOR_ATTACHMENT1 0x8CE1
#define GL_COLOR_ATTACHMENT2 0x8CE2
#define GL_COLOR_ATTACHMENT3 0x8CE3
#define GL_COLOR_ATTACHMENT4 0x8CE4
#define GL_COLOR_ATTACHMENT5 0x8CE5
#define GL_COLOR_ATTACHMENT6 0x8CE6
#define GL_COLOR_ATTACHMENT7 0x8CE7
#define GL_COLOR_ATTACHMENT8 0x8CE8
#define GL_COLOR_ATTACHMENT9 0x8CE9
#define GL_COLOR_ATTACHMENT10 0x8CEA
#define GL_COLOR_ATTACHMENT11 0x8CEB
#define GL_COLOR_ATTACHMENT12 0x8CEC
#define GL_COLOR_ATTACHMENT13 0x8CED
#define GL_COLOR_ATTACHMENT14 0x8CEE
#define GL_COLOR_ATTACHMENT15 0x8CEF
#define GL_COLOR_ATTACHMENT16 0x8CF0
#define GL_COLOR_ATTACHMENT17 0x8CF1
#define GL_COLOR_ATTACHMENT18 0x8CF2
#define GL_COLOR_ATTACHMENT19 0x8CF3
#define GL_COLOR_ATTACHMENT20 0x8CF4
#define GL_COLOR_ATTACHMENT21 0x8CF5
#define GL_COLOR_ATTACHMENT22 0x8CF6
#define GL_COLOR_ATTACHMENT23 0x8CF7
#define GL_COLOR_ATTACHMENT24 0x8CF8
#define GL_COLOR_ATTACHMENT25 0x8CF9
#define GL_COLOR_ATTACHMENT26 0x8CFA
#define GL_COLOR_ATTACHMENT27 0x8CFB
#define GL_COLOR_ATTACHMENT28 0x8CFC
#define GL_COLOR_ATTACHMENT29 0x8CFD
#define GL_COLOR_ATTACHMENT30 0x8CFE
#define GL_COLOR_ATTACHMENT31 0x8CFF
#define GL_DEPTH_ATTACHMENT 0x8D00
#define GL_STENCIL_ATTACHMENT 0x8D20
#define GL_COMPUTE_SHADER 0x91B9
#define GL_GEOMETRY_SHADER 0x8DD9

View File

@ -0,0 +1,19 @@
#pragma once
typedef void (APIENTRYP PFNGLGENFRAMEBUFFERSPROC) (GLsizei n, GLuint *framebuffers);
typedef void (APIENTRYP PFNGLBINDFRAMEBUFFERPROC) (GLenum target, GLuint framebuffer);
typedef void (APIENTRYP PFNGLFRAMEBUFFERTEXTURE2DPROC) (GLenum target, GLenum attachment, GLenum textarget, GLuint texture, GLint level);
typedef void (APIENTRYP PFNGLGETTEXIMAGEPROC) (GLenum target, GLint level, GLenum format, GLenum type, void *pixels);
inline PFNGLGENFRAMEBUFFERSPROC glGenFramebuffers;
inline PFNGLBINDFRAMEBUFFERPROC glBindFramebuffer;
inline PFNGLFRAMEBUFFERTEXTURE2DPROC glFramebufferTexture2D;
inline PFNGLGETTEXIMAGEPROC glGetTexImage;
inline void load_opengl_func()
{
glGenFramebuffers = (PFNGLGENFRAMEBUFFERSPROC)imgl3wGetProcAddress("glGenFramebuffers");
glBindFramebuffer = (PFNGLBINDFRAMEBUFFERPROC)imgl3wGetProcAddress("glBindFramebuffer");
glFramebufferTexture2D = (PFNGLFRAMEBUFFERTEXTURE2DPROC)imgl3wGetProcAddress("glFramebufferTexture2D");
glGetTexImage = (PFNGLGETTEXIMAGEPROC)imgl3wGetProcAddress("glGetTexImage");
}

View File

@ -0,0 +1,97 @@
#include "render_target_opengl.h"
#include "opengl_def.h"
#include "opengl_func.h"
void render_target_opengl::init(int width, int height, texture_format format)
{
glGenFramebuffers(1, &fbo_);
CHECK_GL_ERRORS
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, fbo_);
CHECK_GL_ERRORS
#if defined(__APPLE__)
LockGLContext([NSOpenGLContext currentContext]);
#endif
// Create a new OpenGL texture
glGenTextures(1, &texture_);
CHECK_GL_ERRORS
glBindTexture(GL_TEXTURE_2D, texture_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
CHECK_GL_ERRORS
#if defined(__APPLE__)
UnlockGLContext([NSOpenGLContext currentContext]);
#endif
glFramebufferTexture2D(GL_DRAW_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, texture_, 0);
CHECK_GL_ERRORS
glBindFramebuffer(GL_DRAW_FRAMEBUFFER, 0);
}
void* render_target_opengl::lock(lock_state state)
{
locked_texture_ = malloc(width_ * height_ * 4);
switch (state)
{
case lock_state::READ:
case lock_state::READ_WRITE:
{
glGetTexImage(GL_TEXTURE_2D, 0, GL_RGBA, GL_UNSIGNED_BYTE, locked_texture_);
}
break;
case lock_state::WRITE:
break;
case lock_state::NONE:
break;
}
return locked_texture_;
}
void render_target_opengl::unlock()
{
#if defined(__APPLE__)
LockGLContext([NSOpenGLContext currentContext]);
#endif
// Ensure texturing is enabled before setting texture properties
glBindTexture(GL_TEXTURE_2D, texture_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width_, height_, 0, GL_RGBA, GL_UNSIGNED_BYTE, locked_texture_);
#if defined(__APPLE__)
UnlockGLContext([NSOpenGLContext currentContext]);
#endif
free(locked_texture_);
locked_texture_ = nullptr;
}
void render_target_opengl::on_resize(int width, int height)
{
width_ = width;
height_ = height;
glDeleteTextures(1, &texture_);
#if defined(__APPLE__)
LockGLContext([NSOpenGLContext currentContext]);
#endif
glBindTexture(GL_TEXTURE_2D, texture_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width_, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, nullptr);
#if defined(__APPLE__)
UnlockGLContext([NSOpenGLContext currentContext]);
#endif
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "imgui_impl_opengl3_loader.h"
#include "rhi/render_target.h"
class render_target_opengl : public render_target
{
public:
void init(int width, int height, texture_format format) override;
ImTextureID get_texture_id() override { return (void*)static_cast<intptr_t>(fbo_); }
void* lock(lock_state state) override;
void unlock() override;
protected:
void on_resize(int width, int height) override;
private:
GLuint fbo_ = 0;
GLuint texture_ = 0;
void* locked_texture_ = nullptr;
};

View File

@ -0,0 +1,165 @@
#include "renderer_opengl.h"
#include <SDL_hints.h>
#include "imgui_impl_opengl3.h"
#include "imgui_impl_opengl3_loader.h"
#include "imgui_impl_sdl3.h"
#include "opengl_func.h"
#include "render_target_opengl.h"
#include "texture_opengl.h"
#include "application/application.h"
#include "rhi/shader.h"
#include "shader/shader_cs_opengl.h"
#include "shader/shader_gs_opengl.h"
#include "shader/shader_ps_opengl.h"
#include "shader/shader_vs_opengl.h"
SDL_GLContext g_gl_context = nullptr;
void renderer_opengl::pre_init()
{
#if defined(__APPLE__)
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, SDL_GL_CONTEXT_FORWARD_COMPATIBLE_FLAG); // Always required on Mac
#else
SDL_GL_SetAttribute(SDL_GL_CONTEXT_FLAGS, 0);
#endif
SDL_GL_SetAttribute(SDL_GL_CONTEXT_PROFILE_MASK, SDL_GL_CONTEXT_PROFILE_CORE);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MAJOR_VERSION, 4);
SDL_GL_SetAttribute(SDL_GL_CONTEXT_MINOR_VERSION, 6);
// set sdl using graphics card
// Enable native IME.
SDL_SetHint(SDL_HINT_IME_SHOW_UI, "1");
// Create window with graphics context
SDL_GL_SetAttribute(SDL_GL_DOUBLEBUFFER, 1);
SDL_GL_SetAttribute(SDL_GL_DEPTH_SIZE, 24);
SDL_GL_SetAttribute(SDL_GL_STENCIL_SIZE, 8);
}
bool renderer_opengl::init(SDL_Window* window_handle)
{
if (has_initialized_)
return true;
const auto glsl_version = "#version 460";
g_gl_context = SDL_GL_CreateContext(window_handle);
SDL_GL_MakeCurrent(window_handle, g_gl_context);
SDL_GL_SetSwapInterval(1); // Enable vsync
SDL_ShowWindow(window_handle);
// Setup Platform/Renderer backends
ImGui_ImplSDL3_InitForOpenGL(window_handle, g_gl_context);
ImGui_ImplOpenGL3_Init(glsl_version);
load_opengl_func();
return true;
}
void renderer_opengl::shutdown()
{
ImGui_ImplOpenGL3_Shutdown();
ImGui_ImplSDL3_Shutdown();
SDL_GL_DeleteContext(g_gl_context);
}
Slang::ComPtr<slang::ISession> renderer_opengl::create_slang_session(const std::string& shader_path)
{
slang::TargetDesc target_desc;
target_desc.format = SLANG_GLSL;
target_desc.profile = g_slang_global_session->findProfile("glsl_460");
const char* search_paths[] = { shader_path.c_str() };
slang::SessionDesc session_desc;
session_desc.searchPaths = search_paths;
session_desc.searchPathCount = 1;
session_desc.targets = &target_desc;
session_desc.targetCount = 1;
Slang::ComPtr<slang::ISession> out;
g_slang_global_session->createSession(session_desc, out.writeRef());
return out;
}
std::shared_ptr<shader> renderer_opengl::load_shader(const std::string& module_name, const std::string& entry_name)
{
auto handle = std::make_shared<slang_handle>();
if (!handle->init_slang_module(module_name, entry_name))
return nullptr;
const auto shader_type = handle->get_shader_type();
std::shared_ptr<shader> out;
switch (shader_type)
{
case SLANG_STAGE_VERTEX:
{
out = std::make_shared<shader_vs_opengl>(handle);
}
break;
case SLANG_STAGE_GEOMETRY:
{
out = std::make_shared<shader_gs_opengl>(handle);
}
break;
case SLANG_STAGE_PIXEL:
{
out = std::make_shared<shader_ps_opengl>(handle);
}
break;
case SLANG_STAGE_COMPUTE:
{
out = std::make_shared<shader_cs_opengl>(handle);
}
break;
default:
spdlog::error("slang: unsupported shader type");
return nullptr;
}
if (!out->init())
return nullptr;
return out;
}
void renderer_opengl::new_frame(SDL_Window* window_handle)
{
ImGui_ImplOpenGL3_NewFrame();
ImGui_ImplSDL3_NewFrame();
ImGui::NewFrame();
}
void renderer_opengl::end_frame(SDL_Window* window_handle)
{
// Rendering
ImGui::Render();
const auto& io = ImGui::GetIO();
glViewport(0, 0, (int)io.DisplaySize.x, (int)io.DisplaySize.y);
glClearColor(clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w);
glClear(GL_COLOR_BUFFER_BIT);
ImGui_ImplOpenGL3_RenderDrawData(ImGui::GetDrawData());
SDL_GL_SwapWindow(window_handle);
}
void renderer_opengl::resize(int width, int height)
{
}
std::shared_ptr<texture> renderer_opengl::create_texture(const unsigned char* data, int width, int height)
{
auto out = std::make_shared<texture_opengl>();
if (!out->init_data(data, width, height))
{
out = nullptr;
}
return out;
}
std::shared_ptr<render_target> renderer_opengl::create_render_target(int width, int height, texture_format format)
{
const auto target_dx11 = std::make_shared<render_target_opengl>();
target_dx11->init(width, height, format);
return target_dx11;
}

View File

@ -0,0 +1,23 @@
#pragma once
#include "rhi/renderer.h"
extern SDL_GLContext g_gl_context;
class renderer_opengl : public renderer
{
public:
void pre_init() override;
bool init(SDL_Window* window_handle) override;
void shutdown() override;
Slang::ComPtr<slang::ISession> create_slang_session(const std::string& shader_path) override;
std::shared_ptr<shader> load_shader(const std::string& module_name, const std::string& entry_name) override;
void new_frame(SDL_Window* window_handle) override;
void end_frame(SDL_Window* window_handle) override;
void resize(int width, int height) override;
std::shared_ptr<texture> create_texture(const unsigned char* data, int width, int height) override;
std::shared_ptr<render_target> create_render_target(int width, int height, texture_format format) override;
private:
bool has_initialized_ = false;
};

View File

@ -0,0 +1 @@
#include "shader_cs_opengl.h"

View File

@ -0,0 +1,10 @@
#pragma once
#include "shader_opengl.h"
#include "rhi/opengl/opengl_def.h"
class shader_cs_opengl : public shader_opengl
{
public:
shader_cs_opengl(const std::shared_ptr<slang_handle>& handle) : shader_opengl(handle) {}
GLenum get_shader_type() const override { return GL_COMPUTE_SHADER; }
};

View File

@ -0,0 +1 @@
#include "shader_gs_opengl.h"

View File

@ -0,0 +1,11 @@
#pragma once
#include "shader_opengl.h"
#include "rhi/opengl/opengl_def.h"
class shader_gs_opengl : public shader_opengl
{
public:
shader_gs_opengl(const std::shared_ptr<slang_handle>& handle) : shader_opengl(handle) {}
GLenum get_shader_type() const override { return GL_GEOMETRY_SHADER; }
};

View File

@ -0,0 +1,35 @@
#include "shader_opengl.h"
#include "imgui_impl_opengl3_loader.h"
#include "rhi/slang_handle.h"
bool shader_opengl::init()
{
shader_id_ = glCreateShader(get_shader_type());
if (shader_id_ == 0)
{
spdlog::error("Failed to create shader");
return false;
}
const auto code_blob = handle_->get_entry_point_code();
const auto code_array = static_cast<const char*>(code_blob->getBufferPointer());
const GLint code_size = static_cast<GLint>(code_blob->getBufferSize());
glShaderSource(shader_id_, 1, &code_array, &code_size);
GLint compile_status = GL_FALSE;
glGetShaderiv(shader_id_, GL_COMPILE_STATUS, &compile_status);
if (compile_status == GL_FALSE)
{
GLint log_length = 0;
glGetShaderiv(shader_id_, GL_INFO_LOG_LENGTH, &log_length);
std::vector<GLchar> log(log_length);
glGetShaderInfoLog(shader_id_, log_length, nullptr, log.data());
spdlog::error("Failed to compile shader: {}", log.data());
glDeleteShader(shader_id_);
shader_id_ = 0;
return false;
}
return true;
}

View File

@ -0,0 +1,15 @@
#pragma once
#include "imgui_impl_opengl3_loader.h"
#include "rhi/shader.h"
class shader_opengl : public shader
{
public:
shader_opengl(const std::shared_ptr<slang_handle>& handle) : shader(handle) {}
bool init() override;
[[nodiscard]] virtual GLenum get_shader_type() const = 0;
[[nodiscard]] bool is_initialized() const override { return shader_id_ != 0; }
protected:
GLuint shader_id_;
};

View File

@ -0,0 +1 @@
#include "shader_ps_opengl.h"

View File

@ -0,0 +1,9 @@
#pragma once
#include "shader_opengl.h"
class shader_ps_opengl : public shader_opengl
{
public:
shader_ps_opengl(const std::shared_ptr<slang_handle>& handle) : shader_opengl(handle) {}
GLenum get_shader_type() const override { return GL_FRAGMENT_SHADER; }
};

View File

@ -0,0 +1 @@
#include "shader_vs_opengl.h"

View File

@ -0,0 +1,10 @@
#pragma once
#include "shader_opengl.h"
class shader_vs_opengl : public shader_opengl
{
public:
shader_vs_opengl(const std::shared_ptr<slang_handle>& handle) : shader_opengl(handle) {}
GLenum get_shader_type() const override { return GL_VERTEX_SHADER; }
};

View File

@ -0,0 +1,29 @@
#include "texture_opengl.h"
#include "opengl_def.h"
bool texture_opengl::init_data(const unsigned char* data, int width, int height)
{
width_ = width;
height_ = height;
#if defined(__APPLE__)
LockGLContext([NSOpenGLContext currentContext]);
#endif
// Create a new OpenGL texture
glGenTextures(1, &texture_id_);
CHECK_GL_ERRORS
glBindTexture(GL_TEXTURE_2D, texture_id_);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, width, height, 0, GL_RGBA, GL_UNSIGNED_BYTE, data);
CHECK_GL_ERRORS
#if defined(__APPLE__)
UnlockGLContext([NSOpenGLContext currentContext]);
#endif
return true;
}

View File

@ -0,0 +1,13 @@
#pragma once
#include "imgui_impl_opengl3_loader.h"
#include "rhi/texture.h"
class texture_opengl : public texture
{
public:
ImTextureID get_texture_id() override { return (void*)static_cast<intptr_t>(texture_id_); }
bool init_data(const unsigned char* data, int width, int height) override;
[[nodiscard]] bool is_valid() const override { return texture_id_ != 0; }
private:
GLuint texture_id_ = 0;
};

View File

@ -0,0 +1,6 @@
#include "renderer.h"
void renderer::init_slang(const std::string& shader_path)
{
session_ = create_slang_session(shader_path);
}

View File

@ -1,25 +1,37 @@
#pragma once
#include <SDL_video.h>
#include "imgui.h"
#include "slang_handle.h"
class shader;
class render_target;
class texture;
constexpr ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
constexpr float clear_color_with_alpha[4] = { clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w };
class renderer
{
public:
virtual ~renderer() = default;
virtual void pre_init() {}
virtual bool init(SDL_Window* window_handle) = 0;
virtual void shutdown() = 0;
void init_slang(const std::string& shader_path);
virtual Slang::ComPtr<slang::ISession> create_slang_session(const std::string& shader_path) = 0;
virtual Slang::ComPtr<slang::ISession> get_slang_session() { return session_; }
virtual std::shared_ptr<shader> load_shader(const std::string& module_name, const std::string& entry_name) = 0;
virtual void new_frame() = 0;
virtual void end_frame() = 0;
virtual void new_frame(SDL_Window* window_handle) = 0;
virtual void end_frame(SDL_Window* window_handle) = 0;
virtual void resize(int width, int height) = 0;
virtual texture* create_texture(const unsigned char* data, int width, int height) = 0;
virtual render_target* create_render_target(int width, int height, texture_format format) = 0;
virtual bool compile_shader() = 0;
virtual std::shared_ptr<texture> create_texture(const unsigned char* data, int width, int height) = 0;
virtual std::shared_ptr<render_target> create_render_target(int width, int height, texture_format format) = 0;
void set_vsync(const bool vsync) { vsync_ = vsync; }
protected:
Slang::ComPtr<slang::ISession> session_;
bool vsync_ = true;
};

1
core/rhi/shader.cpp Normal file
View File

@ -0,0 +1 @@
#include "shader.h"

19
core/rhi/shader.h Normal file
View File

@ -0,0 +1,19 @@
#pragma once
class slang_handle;
class shader
{
public:
virtual ~shader() = default;
explicit shader(const std::shared_ptr<slang_handle>& handle) : handle_(handle) {}
virtual bool init() { return false; }
[[nodiscard]] virtual bool is_initialized() const = 0;
[[nodiscard]] virtual bool is_valid() const { return handle_ != nullptr && is_initialized(); }
// param setters
// virtual void set_int(const char* name, int value) = 0;
protected:
std::shared_ptr<slang_handle> handle_;
};

85
core/rhi/slang_handle.cpp Normal file
View File

@ -0,0 +1,85 @@
#include "slang_handle.h"
#include "renderer.h"
#include "application/application.h"
bool slang_handle::init_slang_module(const std::string& module_name, const std::string& entry_name)
{
spdlog::info("slang: init slang handle: module \"{}\", entry point \"{}\"", module_name.c_str(), entry_name.c_str());
const auto session = application::get()->get_renderer()->get_slang_session();
Slang::ComPtr<slang::IBlob> diagnostics;
*module.writeRef() = session->loadModule(module_name.c_str(), diagnostics.writeRef());
if (diagnostics)
{
spdlog::error("slang: load module \"{}\" with diagnostics: {}", module_name.c_str(), (const char*)diagnostics->getBufferPointer());
return false;
}
auto r = module->findEntryPointByName(entry_name.c_str(), entry_point.writeRef());
if (r != SLANG_OK)
{
spdlog::error("slang: can't find entry point \"{}\"", entry_name.c_str());
return false;
}
slang::IComponentType* components[] = { module, entry_point };
r = session->createCompositeComponentType(components, 2, program.writeRef());
if (r != SLANG_OK)
{
spdlog::error("slang: create composite component type failed");
return false;
}
// get entry point index
for (int i = 0; i < module->getDefinedEntryPointCount(); ++i)
{
Slang::ComPtr<slang::IEntryPoint> temp_entry_point;
module->getDefinedEntryPoint(i, temp_entry_point.writeRef());
if (temp_entry_point == entry_point)
{
entry_point_index_ = i;
break;
}
}
slang::ProgramLayout* layout = program->getLayout(target_index);
const auto entry_reflection = layout->getEntryPointByIndex(entry_point_index_);
shader_type_ = entry_reflection->getStage();
#if _DEBUG
spdlog::info("slang: shader type: {}", shader_type_);
spdlog::info("================parameters================");
for (int i = 0; i < layout->getParameterCount(); ++i)
{
slang::VariableLayoutReflection* reflection = layout->getParameterByIndex(i);
const auto type_reflection = reflection->getTypeLayout()->getType();
spdlog::info("{} {} {}; ", i, type_reflection->getName(), reflection->getName());
}
spdlog::info("================parameters================");
#endif
spdlog::info("slang: init slang handle successfully");
return true;
}
Slang::ComPtr<slang::IBlob> slang_handle::get_entry_point_code() const
{
Slang::ComPtr<slang::IBlob> diagnostics;
Slang::ComPtr<slang::IBlob> code_blob;
program->getEntryPointCode(
entry_point_index_,
target_index,
code_blob.writeRef(),
diagnostics.writeRef());
if (diagnostics)
{
spdlog::error("slang: get entry point code failed: {}", (const char*)diagnostics->getBufferPointer());
return nullptr;
}
return code_blob;
}

25
core/rhi/slang_handle.h Normal file
View File

@ -0,0 +1,25 @@
#pragma once
#include "slang-com-ptr.h"
#include <string>
class slang_handle
{
public:
Slang::ComPtr<slang::IModule> module;
Slang::ComPtr<slang::IComponentType> program;
Slang::ComPtr<slang::IEntryPoint> entry_point;
static constexpr int target_index = 0; // only one target
bool init_slang_module(const std::string& module_name, const std::string& entry_name);
[[nodiscard]] Slang::ComPtr<slang::IBlob> get_entry_point_code() const;
[[nodiscard]] const char* get_entry_point_name() const
{
slang::ProgramLayout* layout = program->getLayout(0);
const auto entry_reflection = layout->getEntryPointByIndex(entry_point_index_);
return entry_reflection->getName();
}
[[nodiscard]] SlangStage get_shader_type() const { return shader_type_; }
private:
int entry_point_index_ = -1;
SlangStage shader_type_ = SLANG_STAGE_NONE;
};

View File

@ -0,0 +1,27 @@
#pragma once
#include <d3dcompiler.h>
inline HMODULE get_compiler_module()
{
static HMODULE compiler_dll = nullptr;
if (compiler_dll == nullptr)
{
// load the system one as the last resort
compiler_dll = LoadLibrary(TEXT("d3dcompiler_47.dll"));
}
return compiler_dll;
}
// @return pointer to the D3DCompile function
inline pD3DCompile get_d3d_compile_func()
{
static HMODULE compiler_dll = get_compiler_module();
if (compiler_dll)
{
return static_cast<pD3DCompile>((void*)GetProcAddress(compiler_dll, "D3DCompile"));
}
return nullptr;
}

View File

@ -3,7 +3,7 @@
#include "rhi/rhi_defintion.h"
DXGI_FORMAT ToDXFormat(texture_format format)
inline DXGI_FORMAT to_dx_format(texture_format format)
{
switch (format)
{

View File

@ -2,8 +2,8 @@
#include <assert.h>
#include "dx_format.h"
#include "renderer_dx11.h"
#include "rhi/windows/dx_format.h"
render_target_dx11::render_target_dx11() : lock_state_(lock_state::NONE)
{
@ -22,7 +22,7 @@ void render_target_dx11::init(int width, int height, texture_format format)
texture_desc.Height = height;
texture_desc.MipLevels = 1;
texture_desc.ArraySize = 1;
texture_desc.Format = ToDXFormat(format);
texture_desc.Format = to_dx_format(format);
texture_desc.SampleDesc.Count = 1;
texture_desc.SampleDesc.Quality = 0;
texture_desc.Usage = D3D11_USAGE_DEFAULT;

View File

@ -8,14 +8,20 @@
#include "imgui_impl_sdl3.h"
#include "render_target_dx11.h"
#include "texture_dx11.h"
#include "CompilerHlsl/compileHlsl.hpp"
#include "ShaderWriter/VertexWriter.hpp"
#include "application/application.h"
#include "shader/shader_cs_dx11.h"
#include "shader/shader_ds_dx11.h"
#include "shader/shader_gs_dx11.h"
#include "shader/shader_hs_dx11.h"
#include "shader/shader_ps_dx11.h"
#include "shader/shader_vs_dx11.h"
ref_count_ptr<ID3D11Device> g_d3d11_device;
ref_count_ptr<ID3D11DeviceContext> g_d3d11_device_context;
ref_count_ptr<IDXGISwapChain> g_d3d11_swap_chain;
ref_count_ptr<ID3D11RenderTargetView> g_main_render_target_view;
renderer_dx11::renderer_dx11()
{
@ -50,19 +56,83 @@ void renderer_dx11::shutdown()
g_d3d11_swap_chain.safe_release();
}
void renderer_dx11::new_frame()
Slang::ComPtr<slang::ISession> renderer_dx11::create_slang_session(const std::string& shader_path)
{
slang::TargetDesc target_desc;
target_desc.format = SLANG_HLSL;
target_desc.profile = g_slang_global_session->findProfile("sm_5_1");
const char* search_paths[] = { shader_path.c_str() };
slang::SessionDesc session_desc;
session_desc.searchPaths = search_paths;
session_desc.searchPathCount = 1;
session_desc.targets = &target_desc;
session_desc.targetCount = 1;
Slang::ComPtr<slang::ISession> out;
g_slang_global_session->createSession(session_desc, out.writeRef());
return out;
}
std::shared_ptr<shader> renderer_dx11::load_shader(const std::string& module_name, const std::string& entry_name)
{
auto handle = std::make_shared<slang_handle>();
if (!handle->init_slang_module(module_name, entry_name))
return nullptr;
const auto shader_type = handle->get_shader_type();
std::shared_ptr<shader> out;
switch (shader_type)
{
case SLANG_STAGE_VERTEX:
{
out = std::make_shared<shader_vs_dx11>(handle);
}
break;
case SLANG_STAGE_HULL:
{
out = std::make_shared<shader_hs_dx11>(handle);
}
break;
case SLANG_STAGE_DOMAIN:
{
out = std::make_shared<shader_ds_dx11>(handle);
}
break;
case SLANG_STAGE_GEOMETRY:
{
out = std::make_shared<shader_gs_dx11>(handle);
}
break;
case SLANG_STAGE_PIXEL:
{
out = std::make_shared<shader_ps_dx11>(handle);
}
break;
case SLANG_STAGE_COMPUTE:
{
out = std::make_shared<shader_cs_dx11>(handle);
}
break;
default:
spdlog::error("slang: unsupported shader type");
return nullptr;
}
if (!out->init())
return nullptr;
return out;
}
void renderer_dx11::new_frame(SDL_Window* window_handle)
{
// Start the Dear ImGui frame
ImGui_ImplDX11_NewFrame();
ImGui_ImplSDL3_NewFrame();
ImGui::NewFrame();
}
void renderer_dx11::end_frame()
void renderer_dx11::end_frame(SDL_Window* window_handle)
{
constexpr ImVec4 clear_color = ImVec4(0.45f, 0.55f, 0.60f, 1.00f);
constexpr float clear_color_with_alpha[4] = { clear_color.x * clear_color.w, clear_color.y * clear_color.w, clear_color.z * clear_color.w, clear_color.w };
ImGui::Render();
ID3D11RenderTargetView* target_view = g_main_render_target_view.get_reference();
@ -80,29 +150,23 @@ void renderer_dx11::resize(int width, int height)
create_render_target();
}
texture* renderer_dx11::create_texture(const unsigned char* data, const int width, const int height)
std::shared_ptr<texture> renderer_dx11::create_texture(const unsigned char* data, const int width, const int height)
{
auto out = new texture_dx11();
auto out = std::make_shared<texture_dx11>();
if (!out->init_data(data, width, height))
{
delete out;
out = nullptr;
}
return out;
}
render_target* renderer_dx11::create_render_target(int width, int height, texture_format format)
std::shared_ptr<render_target> renderer_dx11::create_render_target(int width, int height, texture_format format)
{
const auto target_dx11 = new render_target_dx11();
const auto target_dx11 = std::make_shared<render_target_dx11>();
target_dx11->init(width, height, format);
return target_dx11;
}
bool renderer_dx11::compile_shader()
{
return true;
}
void renderer_dx11::create_render_target()
{
ref_count_ptr<ID3D11Texture2D> p_back_buffer;

View File

@ -4,6 +4,9 @@
#include "misc/ref_counting.h"
#include "rhi/renderer.h"
#include "slang-com-ptr.h"
#include "slang.h"
extern ref_count_ptr<ID3D11Device> g_d3d11_device;
extern ref_count_ptr<ID3D11DeviceContext> g_d3d11_device_context;
extern ref_count_ptr<IDXGISwapChain> g_d3d11_swap_chain;
@ -16,13 +19,15 @@ public:
bool init(SDL_Window* window_handle) override;
void shutdown() override;
void new_frame() override;
void end_frame() override;
Slang::ComPtr<slang::ISession> create_slang_session(const std::string& shader_path) override;
std::shared_ptr<shader> load_shader(const std::string& module_name, const std::string& entry_name) override;
void new_frame(SDL_Window* window_handle) override;
void end_frame(SDL_Window* window_handle) override;
void resize(int width, int height) override;
texture* create_texture(const unsigned char* data, int width, int height) override;
render_target* create_render_target(int width, int height, texture_format format) override;
bool compile_shader() override;
std::shared_ptr<texture> create_texture(const unsigned char* data, int width, int height) override;
std::shared_ptr<render_target> create_render_target(int width, int height, texture_format format) override;
protected:
void create_render_target();
bool create_device(HWND in_hwnd);

View File

@ -0,0 +1,10 @@
#include "shader_cs_dx11.h"
#include "rhi/slang_handle.h"
#include "rhi/windows/dx11/dx11_func.h"
#include "rhi/windows/dx11/renderer_dx11.h"
HRESULT shader_cs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreateComputeShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, compute_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_cs_dx11 : public shader_dx11
{
public:
explicit shader_cs_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return compute_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "cs_5_0"; }
[[nodiscard]] bool is_initialized() const override { return compute_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11ComputeShader> compute_shader_;
};

View File

@ -0,0 +1,6 @@
#include "shader_ds_dx11.h"
HRESULT shader_ds_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreateDomainShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, domain_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_ds_dx11 : public shader_dx11
{
public:
explicit shader_ds_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return domain_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "ds_5_0"; }
[[nodiscard]] bool is_initialized() const override { return domain_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11DomainShader> domain_shader_;
};

View File

@ -0,0 +1,62 @@
#include "shader_dx11.h"
#include <d3dcommon.h>
#include "slang-com-ptr.h"
#include "rhi/windows/dx11/dx11_func.h"
#include "rhi/windows/dx11/renderer_dx11.h"
bool shader_dx11::init()
{
Slang::ComPtr<ID3DBlob> kernel_blob;
Slang::ComPtr<ID3DBlob> error_blob;
const auto code_blob = handle_->get_entry_point_code();
if (!code_blob)
{
spdlog::error("slang: get entry point code failed");
return false;
}
const auto compile_func = get_d3d_compile_func();
if (!compile_func)
{
spdlog::critical("slang: get D3DCompile function failed");
return false;
}
unsigned int shader_flags = D3DCOMPILE_ENABLE_STRICTNESS;
#if _DEBUG
shader_flags |= D3DCOMPILE_DEBUG;
#else
shader_flags |= D3DCOMPILE_OPTIMIZATION_LEVEL3;
#endif
const auto target = "cs_5_0";
auto hr = compile_func(
code_blob->getBufferPointer(),
code_blob->getBufferSize(),
nullptr,
nullptr,
nullptr,
handle_->get_entry_point_name(),
target,
shader_flags,
0,
kernel_blob.writeRef(),
error_blob.writeRef());
if (FAILED(hr))
{
spdlog::error("slang: compile shader failed: {}", (const char*)error_blob->GetBufferPointer());
return false;
}
hr = create_shader(kernel_blob, g_d3d11_device);
if (FAILED(hr))
{
spdlog::error("slang: create compute shader failed: {:x}", hr);
return false;
}
return true;
}

View File

@ -0,0 +1,18 @@
#pragma once
#include "rhi/shader.h"
#include <d3d11.h>
class shader_dx11 : public shader
{
public:
explicit shader_dx11(const std::shared_ptr<slang_handle>& handle)
: shader(handle)
{
}
bool init() override;
virtual HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) = 0;
[[nodiscard]] virtual ID3D11DeviceChild* get_shader() = 0;
[[nodiscard]] virtual const char* get_shader_model() const = 0;
};

View File

@ -0,0 +1,6 @@
#include "shader_gs_dx11.h"
HRESULT shader_gs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreateGeometryShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, geometry_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_gs_dx11 : public shader_dx11
{
public:
explicit shader_gs_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return geometry_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "gs_5_0"; }
[[nodiscard]] bool is_initialized() const override { return geometry_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11GeometryShader> geometry_shader_;
};

View File

@ -0,0 +1,6 @@
#include "shader_hs_dx11.h"
HRESULT shader_hs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreateHullShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, hull_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_hs_dx11 : public shader_dx11
{
public:
explicit shader_hs_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return hull_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "hs_5_0"; }
[[nodiscard]] bool is_initialized() const override { return hull_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11HullShader> hull_shader_;
};

View File

@ -0,0 +1,6 @@
#include "shader_ps_dx11.h"
HRESULT shader_ps_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreatePixelShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, pixel_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_ps_dx11 : public shader_dx11
{
public:
explicit shader_ps_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return pixel_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "ps_5_0"; }
[[nodiscard]] bool is_initialized() const override { return pixel_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11PixelShader> pixel_shader_;
};

View File

@ -0,0 +1,6 @@
#include "shader_vs_dx11.h"
HRESULT shader_vs_dx11::create_shader(ID3DBlob* blob, ID3D11Device* device)
{
return device->CreateVertexShader(blob->GetBufferPointer(), blob->GetBufferSize(), nullptr, vertex_shader_.writeRef());
}

View File

@ -0,0 +1,20 @@
#pragma once
#include "shader_dx11.h"
#include "slang-com-ptr.h"
class shader_vs_dx11 : public shader_dx11
{
public:
explicit shader_vs_dx11(const std::shared_ptr<slang_handle>& handle)
: shader_dx11(handle)
{
}
HRESULT create_shader(ID3DBlob* blob, ID3D11Device* device) override;
[[nodiscard]] ID3D11DeviceChild* get_shader() override { return vertex_shader_; }
[[nodiscard]] const char* get_shader_model() const override { return "vs_5_0"; }
[[nodiscard]] bool is_initialized() const override { return vertex_shader_ != nullptr; }
private:
Slang::ComPtr<ID3D11VertexShader> vertex_shader_;
};

View File

@ -1,73 +0,0 @@
# see http://editorconfig.org/ for docs on this file
root = true
[*]
# help with sharing files across os's (i.e. network share or through local vm)
end_of_line = lf
#charset temporarily disabled due to bug in VS2017 changing to UTF-8 with BOM (https://favro.com/card/c564ede4ed3337f7b17986b6/Uni-17877)
#charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = true
# formattable file extensions (keep in sync with format.ini from unity-meta repo)
#
# Note: We need to split the formattable files configs into shorter duplicate entries (logically grouped)
# due to known issue in VS editorconfig extension where there is a limit of 51 characters (empirically determined).
# see: https://github.com/editorconfig/editorconfig-visualstudio/issues/21
#
## uncrustify
[*.{c,h,cpp,hpp,m,mm,cc,cs}]
indent_style = space
indent_size = 4
## generic formatter (shaders)
[*.{cg,cginc,glslinc,hlsl,shader,y,ypp,yy}]
indent_style = space
indent_size = 4
## generic formatter (misc)
[*.{asm,s,S,pch,pchmm,java,sh,uss}]
indent_style = space
indent_size = 4
## perltidy
[*.{pl,pm,t,it}]
indent_style = space
indent_size = 4
## unity special
[*.{bindings,mem.xml}]
indent_style = space
indent_size = 4
# other filetypes we want to overwrite default configuration to preserve the standard
[{Makefile,makefile}]
# TAB characters are part of the Makefile format
indent_style = tab
[*.{md,markdown}]
# trailing whitespace is significant in markdown (bad choice, bad!)
trim_trailing_whitespace = false
# keep these and the VS stuff below in sync with .hgeol's CRLF extensions
[*.{vcproj,bat,cmd,xaml,tt,t4,ttinclude}]
end_of_line = crlf
# this VS-specific stuff is based on experiments to see how VS will modify a file after it has been manually edited.
# the settings are meant to closely match what VS does to minimize unnecessary diffs. this duplicates some settings in *
# but let's be explicit here to be safe (in case someone wants to copy-paste this out to another .editorconfig).
[*.{vcxproj,vcxproj.filters,csproj,props,targets}]
indent_style = space
indent_size = 2
end_of_line = crlf
charset = utf-8-bom
trim_trailing_whitespace = true
insert_final_newline = false
[*.{sln,sln.template}]
indent_style = tab
indent_size = 4
end_of_line = crlf
charset = utf-8
trim_trailing_whitespace = true
insert_final_newline = false

View File

@ -1,29 +0,0 @@
# Compiled Object files
*.slo
*.lo
*.o
*.obj
# Precompiled Headers
*.gch
*.pch
# Compiled Dynamic libraries
*.so
*.dylib
*.dll
# Fortran module files
*.mod
*.smod
# Compiled Static libraries
*.lai
*.la
*.a
*.lib
# Executables
*.exe
*.out
*.app

View File

@ -1,51 +0,0 @@
cmake_minimum_required(VERSION 3.15)
project(HLSLcc)
set(CMAKE_CXX_STANDARD 11)
option(HLSLCC_LIBRARY_SHARED "Build shared library instead of static." ON)
file(GLOB HLSLCC_HDRS "${CMAKE_CURRENT_SOURCE_DIR}/include/*")
set(HLSLCC_SRC
src/ControlFlowGraph.cpp
src/ControlFlowGraphUtils.cpp
src/DataTypeAnalysis.cpp
src/Declaration.cpp
src/decode.cpp
src/HLSLcc.cpp
src/HLSLccToolkit.cpp
src/HLSLCrossCompilerContext.cpp
src/Instruction.cpp
src/LoopTransform.cpp
src/Operand.cpp
src/reflect.cpp
src/Shader.cpp
src/ShaderInfo.cpp
src/toGLSL.cpp
src/toGLSLDeclaration.cpp
src/toGLSLInstruction.cpp
src/toGLSLOperand.cpp
src/toMetal.cpp
src/toMetalDeclaration.cpp
src/toMetalInstruction.cpp
src/toMetalOperand.cpp
src/UseDefineChains.cpp
src/cbstring/bsafe.c
src/cbstring/bstraux.c
src/cbstring/bstrlib.c)
if(HLSLCC_LIBRARY_SHARED)
add_library(${PROJECT_NAME} SHARED ${HLSLCC_SRC})
else()
add_library(${PROJECT_NAME} STATIC ${HLSLCC_SRC})
endif()
target_include_directories(${PROJECT_NAME}
PUBLIC
${CMAKE_CURRENT_SOURCE_DIR}
${CMAKE_CURRENT_SOURCE_DIR}/include
PRIVATE
${CMAKE_CURRENT_SOURCE_DIR}/src
${CMAKE_CURRENT_SOURCE_DIR}/src/cbstring
${CMAKE_CURRENT_SOURCE_DIR}/src/internal_includes)

View File

@ -1,54 +0,0 @@
# HLSLcc
DirectX shader bytecode cross compiler.
Originally based on https://github.com/James-Jones/HLSLCrossCompiler.
This library takes DirectX bytecode as input, and translates it into the following languages:
- GLSL (OpenGL 3.2 and later)
- GLSL ES (OpenGL ES 2.0 and later)
- GLSL for Vulkan consumption (as input for Glslang to generate SPIR-V)
- Metal Shading Language
This library is used to generate all shaders in Unity for OpenGL, OpenGL ES 3.0+, Metal and Vulkan.
Changes from original HLSLCrossCompiler:
- Codebase changed to C++11, with major code reorganizations.
- Support for multiple language output backends (currently ToGLSL and ToMetal)
- Metal language output support
- Temp register type analysis: In DX bytecode the registers are typeless 32-bit 4-vectors. We do code analysis to infer the actual data types (to prevent the need for tons of bitcasts).
- Loop transformation: Detect constructs that look like for-loops and transform them back to their original form
- Support for partial precision variables in HLSL (min16float etc). Do extra analysis pass to infer the intended precision of samplers.
- Reflection interface to retrieve the shader inputs and their types.
- Lots of workarounds for various driver/shader compiler bugs.
- Lots of minor fixes and improvements for correctness
- Lots of Unity-specific tweaks to allow extending HLSL without having to change the D3D compiler itself.
## Note
This project is originally integrated into the Unity build systems. However, building this library should be fairly straightforward: just compile `src/*.cpp` (in C++11 mode!) and `src/cbstring/*.c` with the following include paths:
- include
- src/internal_includes
- src/cbstrinc
- src
Alternatively, a CMakeLists.txt is provided to build the project using cmake.
The main entry point is TranslateHLSLFromMem() function in HLSLcc.cpp (taking DX bytecode as input).
## Contributors
- Mikko Strandborg
- Juho Oravainen
- David Rogers
- Marton Ekler
- Antti Tapaninen
- Florian Penzkofer
- Alexey Orlov
- Povilas Kanapickas
- Aleksandr Kirillov
- Kay Chang
## License
MIT license for HLSLcc itself, BSD license for the bstring library. See license.txt.

View File

@ -1,510 +0,0 @@
#pragma once
#include <vector>
#include <set>
#include <map>
#include <string>
#include "growing_array.h"
#include <stdint.h>
//Reflection
#define MAX_RESOURCE_BINDINGS 256
typedef enum _SHADER_VARIABLE_TYPE
{
SVT_VOID = 0,
SVT_BOOL = 1,
SVT_INT = 2,
SVT_FLOAT = 3,
SVT_STRING = 4,
SVT_TEXTURE = 5,
SVT_TEXTURE1D = 6,
SVT_TEXTURE2D = 7,
SVT_TEXTURE3D = 8,
SVT_TEXTURECUBE = 9,
SVT_SAMPLER = 10,
SVT_PIXELSHADER = 15,
SVT_VERTEXSHADER = 16,
SVT_UINT = 19,
SVT_UINT8 = 20,
SVT_GEOMETRYSHADER = 21,
SVT_RASTERIZER = 22,
SVT_DEPTHSTENCIL = 23,
SVT_BLEND = 24,
SVT_BUFFER = 25,
SVT_CBUFFER = 26,
SVT_TBUFFER = 27,
SVT_TEXTURE1DARRAY = 28,
SVT_TEXTURE2DARRAY = 29,
SVT_RENDERTARGETVIEW = 30,
SVT_DEPTHSTENCILVIEW = 31,
SVT_TEXTURE2DMS = 32,
SVT_TEXTURE2DMSARRAY = 33,
SVT_TEXTURECUBEARRAY = 34,
SVT_HULLSHADER = 35,
SVT_DOMAINSHADER = 36,
SVT_INTERFACE_POINTER = 37,
SVT_COMPUTESHADER = 38,
SVT_DOUBLE = 39,
SVT_RWTEXTURE1D = 40,
SVT_RWTEXTURE1DARRAY = 41,
SVT_RWTEXTURE2D = 42,
SVT_RWTEXTURE2DARRAY = 43,
SVT_RWTEXTURE3D = 44,
SVT_RWBUFFER = 45,
SVT_BYTEADDRESS_BUFFER = 46,
SVT_RWBYTEADDRESS_BUFFER = 47,
SVT_STRUCTURED_BUFFER = 48,
SVT_RWSTRUCTURED_BUFFER = 49,
SVT_APPEND_STRUCTURED_BUFFER = 50,
SVT_CONSUME_STRUCTURED_BUFFER = 51,
// Only used as a marker when analyzing register types
SVT_FORCED_INT = 152,
// Integer that can be either signed or unsigned. Only used as an intermediate step when doing data type analysis
SVT_INT_AMBIGUOUS = 153,
// Partial precision types. Used when doing type analysis
SVT_FLOAT10 = 53, // Seems to be used in constant buffers
SVT_FLOAT16 = 54,
SVT_INT16 = 156,
SVT_INT12 = 157,
SVT_UINT16 = 158,
SVT_FORCE_DWORD = 0x7fffffff
} SHADER_VARIABLE_TYPE;
typedef enum _SHADER_VARIABLE_CLASS
{
SVC_SCALAR = 0,
SVC_VECTOR = (SVC_SCALAR + 1),
SVC_MATRIX_ROWS = (SVC_VECTOR + 1),
SVC_MATRIX_COLUMNS = (SVC_MATRIX_ROWS + 1),
SVC_OBJECT = (SVC_MATRIX_COLUMNS + 1),
SVC_STRUCT = (SVC_OBJECT + 1),
SVC_INTERFACE_CLASS = (SVC_STRUCT + 1),
SVC_INTERFACE_POINTER = (SVC_INTERFACE_CLASS + 1),
SVC_FORCE_DWORD = 0x7fffffff
} SHADER_VARIABLE_CLASS;
///////////////////////////////////////
// Types
enum TESSELLATOR_PARTITIONING
{
TESSELLATOR_PARTITIONING_UNDEFINED = 0,
TESSELLATOR_PARTITIONING_INTEGER = 1,
TESSELLATOR_PARTITIONING_POW2 = 2,
TESSELLATOR_PARTITIONING_FRACTIONAL_ODD = 3,
TESSELLATOR_PARTITIONING_FRACTIONAL_EVEN = 4
};
enum TESSELLATOR_OUTPUT_PRIMITIVE
{
TESSELLATOR_OUTPUT_UNDEFINED = 0,
TESSELLATOR_OUTPUT_POINT = 1,
TESSELLATOR_OUTPUT_LINE = 2,
TESSELLATOR_OUTPUT_TRIANGLE_CW = 3,
TESSELLATOR_OUTPUT_TRIANGLE_CCW = 4
};
typedef enum TESSELLATOR_DOMAIN
{
TESSELLATOR_DOMAIN_UNDEFINED = 0,
TESSELLATOR_DOMAIN_ISOLINE = 1,
TESSELLATOR_DOMAIN_TRI = 2,
TESSELLATOR_DOMAIN_QUAD = 3
} TESSELLATOR_DOMAIN;
enum SPECIAL_NAME
{
NAME_UNDEFINED = 0,
NAME_POSITION = 1,
NAME_CLIP_DISTANCE = 2,
NAME_CULL_DISTANCE = 3,
NAME_RENDER_TARGET_ARRAY_INDEX = 4,
NAME_VIEWPORT_ARRAY_INDEX = 5,
NAME_VERTEX_ID = 6,
NAME_PRIMITIVE_ID = 7,
NAME_INSTANCE_ID = 8,
NAME_IS_FRONT_FACE = 9,
NAME_SAMPLE_INDEX = 10,
// The following are added for D3D11
NAME_FINAL_QUAD_U_EQ_0_EDGE_TESSFACTOR = 11,
NAME_FINAL_QUAD_V_EQ_0_EDGE_TESSFACTOR = 12,
NAME_FINAL_QUAD_U_EQ_1_EDGE_TESSFACTOR = 13,
NAME_FINAL_QUAD_V_EQ_1_EDGE_TESSFACTOR = 14,
NAME_FINAL_QUAD_U_INSIDE_TESSFACTOR = 15,
NAME_FINAL_QUAD_V_INSIDE_TESSFACTOR = 16,
NAME_FINAL_TRI_U_EQ_0_EDGE_TESSFACTOR = 17,
NAME_FINAL_TRI_V_EQ_0_EDGE_TESSFACTOR = 18,
NAME_FINAL_TRI_W_EQ_0_EDGE_TESSFACTOR = 19,
NAME_FINAL_TRI_INSIDE_TESSFACTOR = 20,
NAME_FINAL_LINE_DETAIL_TESSFACTOR = 21,
NAME_FINAL_LINE_DENSITY_TESSFACTOR = 22,
};
enum INOUT_COMPONENT_TYPE
{
INOUT_COMPONENT_UNKNOWN = 0,
INOUT_COMPONENT_UINT32 = 1,
INOUT_COMPONENT_SINT32 = 2,
INOUT_COMPONENT_FLOAT32 = 3
};
enum MIN_PRECISION
{
MIN_PRECISION_DEFAULT = 0,
MIN_PRECISION_FLOAT_16 = 1,
MIN_PRECISION_FLOAT_2_8 = 2,
MIN_PRECISION_RESERVED = 3,
MIN_PRECISION_SINT_16 = 4,
MIN_PRECISION_UINT_16 = 5,
MIN_PRECISION_ANY_16 = 0xf0,
MIN_PRECISION_ANY_10 = 0xf1
};
enum ResourceType
{
RTYPE_CBUFFER,//0
RTYPE_TBUFFER,//1
RTYPE_TEXTURE,//2
RTYPE_SAMPLER,//3
RTYPE_UAV_RWTYPED,//4
RTYPE_STRUCTURED,//5
RTYPE_UAV_RWSTRUCTURED,//6
RTYPE_BYTEADDRESS,//7
RTYPE_UAV_RWBYTEADDRESS,//8
RTYPE_UAV_APPEND_STRUCTURED,//9
RTYPE_UAV_CONSUME_STRUCTURED,//10
RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER,//11
RTYPE_COUNT,
};
enum ResourceGroup
{
RGROUP_CBUFFER,
RGROUP_TEXTURE,
RGROUP_SAMPLER,
RGROUP_UAV,
RGROUP_COUNT,
};
enum REFLECT_RESOURCE_DIMENSION
{
REFLECT_RESOURCE_DIMENSION_UNKNOWN = 0,
REFLECT_RESOURCE_DIMENSION_BUFFER = 1,
REFLECT_RESOURCE_DIMENSION_TEXTURE1D = 2,
REFLECT_RESOURCE_DIMENSION_TEXTURE1DARRAY = 3,
REFLECT_RESOURCE_DIMENSION_TEXTURE2D = 4,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DARRAY = 5,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMS = 6,
REFLECT_RESOURCE_DIMENSION_TEXTURE2DMSARRAY = 7,
REFLECT_RESOURCE_DIMENSION_TEXTURE3D = 8,
REFLECT_RESOURCE_DIMENSION_TEXTURECUBE = 9,
REFLECT_RESOURCE_DIMENSION_TEXTURECUBEARRAY = 10,
REFLECT_RESOURCE_DIMENSION_BUFFEREX = 11,
};
enum REFLECT_RESOURCE_PRECISION
{
REFLECT_RESOURCE_PRECISION_UNKNOWN = 0,
REFLECT_RESOURCE_PRECISION_LOWP = 1,
REFLECT_RESOURCE_PRECISION_MEDIUMP = 2,
REFLECT_RESOURCE_PRECISION_HIGHP = 3,
};
enum RESOURCE_RETURN_TYPE
{
RETURN_TYPE_UNORM = 1,
RETURN_TYPE_SNORM = 2,
RETURN_TYPE_SINT = 3,
RETURN_TYPE_UINT = 4,
RETURN_TYPE_FLOAT = 5,
RETURN_TYPE_MIXED = 6,
RETURN_TYPE_DOUBLE = 7,
RETURN_TYPE_CONTINUED = 8,
RETURN_TYPE_UNUSED = 9,
};
typedef std::map<std::string, REFLECT_RESOURCE_PRECISION> HLSLccSamplerPrecisionInfo;
struct ResourceBinding
{
std::string name;
ResourceType eType;
uint32_t ui32BindPoint;
uint32_t ui32BindCount;
uint32_t ui32Flags;
uint32_t ui32Space;
uint32_t ui32RangeID;
REFLECT_RESOURCE_DIMENSION eDimension;
RESOURCE_RETURN_TYPE ui32ReturnType;
uint32_t ui32NumSamples;
REFLECT_RESOURCE_PRECISION ePrecision;
int m_SamplerMode; // (SB_SAMPLER_MODE) For samplers, this is the sampler mode this sampler is declared with
SHADER_VARIABLE_TYPE GetDataType() const
{
switch (ePrecision)
{
case REFLECT_RESOURCE_PRECISION_LOWP:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT10;
case RETURN_TYPE_SINT:
return SVT_INT16;
case RETURN_TYPE_UINT:
return SVT_UINT16;
default:
// ASSERT(0);
return SVT_FLOAT10;
}
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT16;
case RETURN_TYPE_SINT:
return SVT_INT16;
case RETURN_TYPE_UINT:
return SVT_UINT16;
default:
// ASSERT(0);
return SVT_FLOAT16;
}
default:
switch (ui32ReturnType)
{
case RETURN_TYPE_UNORM:
case RETURN_TYPE_SNORM:
case RETURN_TYPE_FLOAT:
return SVT_FLOAT;
case RETURN_TYPE_SINT:
return SVT_INT;
case RETURN_TYPE_UINT:
return SVT_UINT;
case RETURN_TYPE_DOUBLE:
return SVT_DOUBLE;
default:
// ASSERT(0);
return SVT_FLOAT;
}
}
}
};
struct ShaderVarType
{
ShaderVarType() :
Class(),
Type(),
Rows(),
Columns(),
Elements(),
MemberCount(),
Offset(),
ParentCount(),
Parent(),
m_IsUsed(false)
{}
SHADER_VARIABLE_CLASS Class;
SHADER_VARIABLE_TYPE Type;
uint32_t Rows;
uint32_t Columns;
uint32_t Elements;
uint32_t MemberCount;
uint32_t Offset;
std::string name;
uint32_t ParentCount;
struct ShaderVarType * Parent;
//Includes all parent names.
std::string fullName;
std::vector<struct ShaderVarType> Members;
bool m_IsUsed; // If not set, is not used in the shader code
uint32_t GetMemberCount() const
{
if (Class == SVC_STRUCT)
{
uint32_t res = 0;
std::vector<struct ShaderVarType>::const_iterator itr;
for (itr = Members.begin(); itr != Members.end(); itr++)
{
res += itr->GetMemberCount();
}
return res;
}
else
return 1;
}
};
struct ShaderVar
{
std::string name;
int haveDefaultValue;
std::vector<uint32_t> pui32DefaultValues;
//Offset/Size in bytes.
uint32_t ui32StartOffset;
uint32_t ui32Size;
ShaderVarType sType;
};
struct ConstantBuffer
{
std::string name;
std::vector<ShaderVar> asVars;
uint32_t ui32TotalSizeInBytes;
uint32_t GetMemberCount(bool stripUnused) const
{
uint32_t res = 0;
std::vector<ShaderVar>::const_iterator itr;
for (itr = asVars.begin(); itr != asVars.end(); itr++)
{
if (stripUnused && !itr->sType.m_IsUsed)
continue;
res += itr->sType.GetMemberCount();
}
return res;
}
};
struct ClassType
{
std::string name;
uint16_t ui16ID;
uint16_t ui16ConstBufStride;
uint16_t ui16Texture;
uint16_t ui16Sampler;
};
struct ClassInstance
{
std::string name;
uint16_t ui16ID;
uint16_t ui16ConstBuf;
uint16_t ui16ConstBufOffset;
uint16_t ui16Texture;
uint16_t ui16Sampler;
};
class Operand;
class ShaderInfo
{
public:
struct InOutSignature
{
std::string semanticName;
uint32_t ui32SemanticIndex;
SPECIAL_NAME eSystemValueType;
INOUT_COMPONENT_TYPE eComponentType;
uint32_t ui32Register;
uint32_t ui32Mask;
uint32_t ui32ReadWriteMask;
int iRebase; // If mask does not start from zero, this indicates the offset that needs to be subtracted from each swizzle
uint32_t ui32Stream;
MIN_PRECISION eMinPrec;
std::set<uint32_t> isIndexed; // Set of phases where this input/output is part of a index range.
std::map<uint32_t, uint32_t> indexStart; // If indexed, contains the start index for the range
std::map<uint32_t, uint32_t> index; // If indexed, contains the current index relative to the index start.
};
ShaderInfo() :
ui32MajorVersion(),
ui32MinorVersion(),
psResourceBindings(),
psConstantBuffers(),
psThisPointerConstBuffer(),
psClassTypes(),
psClassInstances()
{}
SHADER_VARIABLE_TYPE GetTextureDataType(uint32_t regNo);
int GetResourceFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ResourceBinding** ppsOutBinding) const;
void GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const;
int GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const;
int GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
int GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull = false) const;
int GetOutputSignatureFromRegister(const uint32_t ui32Register,
const uint32_t ui32CompMask,
const uint32_t ui32Stream,
const InOutSignature** ppsOut,
bool allowNull = false) const;
int GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const;
static ResourceGroup ResourceTypeToResourceGroup(ResourceType);
static uint32_t GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize = false);
static int GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t(&pui32Swizzle)[4],
const ConstantBuffer* psCBuf,
const ShaderVarType** ppsShaderVar,
bool* isArray,
std::vector<uint32_t>* arrayIndices,
int32_t* pi32Rebase,
uint32_t flags);
static std::string GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors);
// Apply shader precision information to resource bindings
void AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info);
uint32_t ui32MajorVersion;
uint32_t ui32MinorVersion;
std::vector<InOutSignature> psInputSignatures;
std::vector<InOutSignature> psOutputSignatures;
std::vector<InOutSignature> psPatchConstantSignatures;
std::vector<ResourceBinding> psResourceBindings;
std::vector<ConstantBuffer> psConstantBuffers;
ConstantBuffer* psThisPointerConstBuffer;
std::vector<ClassType> psClassTypes;
std::vector<ClassInstance> psClassInstances;
//Func table ID to class name ID.
HLSLcc::growing_vector<uint32_t> aui32TableIDToTypeID;
HLSLcc::growing_vector<uint32_t> aui32ResourceMap[RGROUP_COUNT];
HLSLcc::growing_vector<ShaderVarType> sGroupSharedVarType;
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
uint32_t ui32TessInputControlPointCount;
uint32_t ui32TessOutputControlPointCount;
TESSELLATOR_DOMAIN eTessDomain;
bool bEarlyFragmentTests;
};

View File

@ -1,23 +0,0 @@
#pragma once
// In Unity, instancing array sizes should be able to be dynamically patched at runtime by defining the macro.
#include <string>
#define UNITY_RUNTIME_INSTANCING_ARRAY_SIZE_MACRO "UNITY_RUNTIME_INSTANCING_ARRAY_SIZE"
#define UNITY_PRETRANSFORM_CONSTANT_NAME "UnityDisplayOrientationPreTransform"
const unsigned int kArraySizeConstantID = 0;
const unsigned int kPreTransformConstantID = 1;
// TODO: share with Runtime/GfxDevice/InstancingUtilities.h
inline bool IsUnityInstancingConstantBufferName(const char* cbName)
{
static const char kInstancedCbNamePrefix[] = "UnityInstancing";
return strncmp(cbName, kInstancedCbNamePrefix, sizeof(kInstancedCbNamePrefix) - 1) == 0;
}
inline bool IsPreTransformConstantBufferName(const char* cbName)
{
static const char kPreTransformCbNamePrefix[] = "UnityDisplayOrientationPreTransformData";
return strncmp(cbName, kPreTransformCbNamePrefix, sizeof(kPreTransformCbNamePrefix) - 1) == 0;
}

View File

@ -1,45 +0,0 @@
#pragma once
namespace HLSLcc
{
// A vector that automatically grows when written to, fills the intermediate ones with default value.
// Reading from an index returns the default value if attempting to access out of bounds.
template<class T> class growing_vector
{
public:
growing_vector() : data() {}
std::vector<T> data;
T & operator[](std::size_t idx)
{
if (idx >= data.size())
data.resize((idx + 1) * 2);
return data[idx];
}
const T & operator[](std::size_t idx) const
{
static T defaultValue = T();
if (idx >= data.size())
return defaultValue;
return data[idx];
}
};
// Same but with bool specialization
template<> class growing_vector<bool>
{
public:
growing_vector() : data() {}
std::vector<bool> data;
std::vector<bool>::reference operator[](std::size_t idx)
{
if (idx >= data.size())
data.resize((idx + 1) * 2, false);
return data[idx];
}
};
}

View File

@ -1,816 +0,0 @@
#ifndef HLSLCC_H_
#define HLSLCC_H_
#include <string>
#include <vector>
#include <map>
#include <algorithm>
#if defined(_WIN32) && defined(HLSLCC_DYNLIB)
#define HLSLCC_APIENTRY __stdcall
#if defined(libHLSLcc_EXPORTS)
#define HLSLCC_API __declspec(dllexport)
#else
#define HLSLCC_API __declspec(dllimport)
#endif
#else
#define HLSLCC_APIENTRY
#define HLSLCC_API
#endif
#include <stdint.h>
#include <string.h>
typedef enum
{
LANG_DEFAULT,// Depends on the HLSL shader model.
LANG_ES_100, LANG_ES_FIRST = LANG_ES_100,
LANG_ES_300,
LANG_ES_310, LANG_ES_LAST = LANG_ES_310,
LANG_120, LANG_GL_FIRST = LANG_120,
LANG_130,
LANG_140,
LANG_150,
LANG_330,
LANG_400,
LANG_410,
LANG_420,
LANG_430,
LANG_440, LANG_GL_LAST = LANG_440,
LANG_METAL,
} GLLang;
typedef struct GlExtensions
{
uint32_t ARB_explicit_attrib_location : 1;
uint32_t ARB_explicit_uniform_location : 1;
uint32_t ARB_shading_language_420pack : 1;
uint32_t OVR_multiview : 1;
uint32_t EXT_shader_framebuffer_fetch : 1;
} GlExtensions;
#include "ShaderInfo.h"
#include "UnityInstancingFlexibleArraySize.h"
typedef std::vector<std::string> TextureSamplerPairs;
typedef enum INTERPOLATION_MODE
{
INTERPOLATION_UNDEFINED = 0,
INTERPOLATION_CONSTANT = 1,
INTERPOLATION_LINEAR = 2,
INTERPOLATION_LINEAR_CENTROID = 3,
INTERPOLATION_LINEAR_NOPERSPECTIVE = 4,
INTERPOLATION_LINEAR_NOPERSPECTIVE_CENTROID = 5,
INTERPOLATION_LINEAR_SAMPLE = 6,
INTERPOLATION_LINEAR_NOPERSPECTIVE_SAMPLE = 7,
} INTERPOLATION_MODE;
#define PS_FLAG_VERTEX_SHADER 0x1
#define PS_FLAG_HULL_SHADER 0x2
#define PS_FLAG_DOMAIN_SHADER 0x4
#define PS_FLAG_GEOMETRY_SHADER 0x8
#define PS_FLAG_PIXEL_SHADER 0x10
#define TO_FLAG_NONE 0x0
#define TO_FLAG_INTEGER 0x1
#define TO_FLAG_NAME_ONLY 0x2
#define TO_FLAG_DECLARATION_NAME 0x4
#define TO_FLAG_DESTINATION 0x8 //Operand is being written to by assignment.
#define TO_FLAG_UNSIGNED_INTEGER 0x10
#define TO_FLAG_DOUBLE 0x20
// --- TO_AUTO_BITCAST_TO_FLOAT ---
//If the operand is an integer temp variable then this flag
//indicates that the temp has a valid floating point encoding
//and that the current expression expects the operand to be floating point
//and therefore intBitsToFloat must be applied to that variable.
#define TO_AUTO_BITCAST_TO_FLOAT 0x40
#define TO_AUTO_BITCAST_TO_INT 0x80
#define TO_AUTO_BITCAST_TO_UINT 0x100
// AUTO_EXPAND flags automatically expand the operand to at least (i/u)vecX
// to match HLSL functionality.
#define TO_AUTO_EXPAND_TO_VEC2 0x200
#define TO_AUTO_EXPAND_TO_VEC3 0x400
#define TO_AUTO_EXPAND_TO_VEC4 0x800
#define TO_FLAG_BOOL 0x1000
// These flags are only used for Metal:
// Force downscaling of the operand to match
// the other operand (Metal doesn't like mixing halfs with floats)
#define TO_FLAG_FORCE_HALF 0x2000
typedef enum
{
INVALID_SHADER = -1,
PIXEL_SHADER,
VERTEX_SHADER,
GEOMETRY_SHADER,
HULL_SHADER,
DOMAIN_SHADER,
COMPUTE_SHADER,
} SHADER_TYPE;
// Enum for texture dimension reflection data
typedef enum
{
TD_FLOAT = 0,
TD_INT,
TD_2D,
TD_3D,
TD_CUBE,
TD_2DSHADOW,
TD_2DARRAY,
TD_CUBEARRAY
} HLSLCC_TEX_DIMENSION;
// The prefix for all temporary variables used by the generated code.
// Using a texture or uniform name like this will cause conflicts
#define HLSLCC_TEMP_PREFIX "u_xlat"
typedef std::vector<std::pair<std::string, std::string> > MemberDefinitions;
// We store struct definition contents inside a vector of strings
struct StructDefinition
{
StructDefinition() : m_Members(), m_Dependencies(), m_IsPrinted(false) {}
MemberDefinitions m_Members; // A vector of strings with the struct members
std::vector<std::string> m_Dependencies; // A vector of struct names this struct depends on.
bool m_IsPrinted; // Has this struct been printed out yet?
};
typedef std::map<std::string, StructDefinition> StructDefinitions;
// Map of extra function definitions we need to add before the shader body but after the declarations.
typedef std::map<std::string, std::string> FunctionDefinitions;
// A helper class for allocating binding slots
// (because both UAVs and textures use the same slots in Metal, also constant buffers and other buffers etc)
class BindingSlotAllocator
{
typedef std::map<uint32_t, uint32_t> SlotMap;
SlotMap m_Allocations;
uint32_t m_ShaderStageAllocations;
public:
BindingSlotAllocator() : m_Allocations(), m_ShaderStageAllocations(0)
{
for (int i = MAX_RESOURCE_BINDINGS - 1; i >= 0; i--)
m_FreeSlots.push_back(i);
}
enum BindType
{
ConstantBuffer = 0,
RWBuffer,
Texture,
UAV
};
uint32_t GetBindingSlot(uint32_t regNo, BindType type)
{
// The key is regNumber with the bindtype stored to highest 16 bits
uint32_t key = (m_ShaderStageAllocations + regNo) | (uint32_t(type) << 16);
SlotMap::iterator itr = m_Allocations.find(key);
if (itr == m_Allocations.end())
{
uint32_t slot = m_FreeSlots.back();
m_FreeSlots.pop_back();
m_Allocations.insert(std::make_pair(key, slot));
return slot;
}
return itr->second;
}
// Func for reserving binding slots with the original reg number.
// Used for fragment shader UAVs (SetRandomWriteTarget etc).
void ReserveBindingSlot(uint32_t regNo, BindType type)
{
uint32_t key = regNo | (uint32_t(type) << 16);
m_Allocations.insert(std::make_pair(key, regNo));
// Remove regNo from free slots
for (int i = m_FreeSlots.size() - 1; i >= 0; i--)
{
if (m_FreeSlots[i] == regNo)
{
m_FreeSlots.erase(m_FreeSlots.begin() + i);
return;
}
}
}
uint32_t PeekFirstFreeSlot() const
{
return m_FreeSlots.back();
}
uint32_t SaveTotalShaderStageAllocationsCount()
{
m_ShaderStageAllocations = m_Allocations.size();
return m_ShaderStageAllocations;
}
private:
std::vector<uint32_t> m_FreeSlots;
};
//The shader stages (Vertex, Pixel et al) do not depend on each other
//in HLSL. GLSL is a different story. HLSLCrossCompiler requires
//that hull shaders must be compiled before domain shaders, and
//the pixel shader must be compiled before all of the others.
//During compilation the GLSLCrossDependencyData struct will
//carry over any information needed about a different shader stage
//in order to construct valid GLSL shader combinations.
//Using GLSLCrossDependencyData is optional. However some shader
//combinations may show link failures, or runtime errors.
class GLSLCrossDependencyData
{
public:
struct GLSLBufferBindPointInfo
{
uint32_t slot;
bool known;
};
// A container for a single Vulkan resource binding (<set, binding> pair)
struct VulkanResourceBinding
{
uint32_t set;
uint32_t binding;
};
enum GLSLBufferType
{
BufferType_ReadWrite,
BufferType_Constant,
BufferType_SSBO,
BufferType_Texture,
BufferType_UBO,
BufferType_Count,
BufferType_Generic = BufferType_ReadWrite
};
private:
//Required if PixelInterpDependency is true
std::vector<INTERPOLATION_MODE> pixelInterpolation;
// Map of varying locations, indexed by varying names.
typedef std::map<std::string, uint32_t> VaryingLocations;
static const int MAX_NAMESPACES = 6; // Max namespaces: vert input, hull input, domain input, geom input, ps input, (ps output)
VaryingLocations varyingLocationsMap[MAX_NAMESPACES];
uint32_t nextAvailableVaryingLocation[MAX_NAMESPACES];
typedef std::map<std::string, VulkanResourceBinding> VulkanResourceBindings;
VulkanResourceBindings m_VulkanResourceBindings;
uint32_t m_NextAvailableVulkanResourceBinding[8]; // one per set.
typedef std::map<std::string, uint32_t> GLSLResouceBindings;
public:
GLSLResouceBindings m_GLSLResourceBindings;
uint32_t m_NextAvailableGLSLResourceBinding[BufferType_Count]; // UAV, Constant and Buffers have seperate binding ranges
uint32_t m_StructuredBufferBindPoints[MAX_RESOURCE_BINDINGS]; // for the old style bindings
inline int GetVaryingNamespace(SHADER_TYPE eShaderType, bool isInput)
{
switch (eShaderType)
{
case VERTEX_SHADER:
return isInput ? 0 : 1;
case HULL_SHADER:
return isInput ? 1 : 2;
case DOMAIN_SHADER:
return isInput ? 2 : 3;
case GEOMETRY_SHADER:
// The input depends on whether there's a tessellation shader before us
if (isInput)
{
return ui32ProgramStages & PS_FLAG_DOMAIN_SHADER ? 3 : 1;
}
return 4;
case PIXEL_SHADER:
// The inputs can come from geom shader, domain shader or directly from vertex shader
if (isInput)
{
if (ui32ProgramStages & PS_FLAG_GEOMETRY_SHADER)
{
return 4;
}
else if (ui32ProgramStages & PS_FLAG_DOMAIN_SHADER)
{
return 3;
}
else
{
return 1;
}
}
return 5; // This value never really used
default:
return 0;
}
}
public:
GLSLCrossDependencyData()
: eTessPartitioning(),
eTessOutPrim(),
fMaxTessFactor(64.0),
numPatchesInThreadGroup(0),
hasControlPoint(false),
hasPatchConstant(false),
ui32ProgramStages(0),
m_ExtBlendModes()
{
memset(nextAvailableVaryingLocation, 0, sizeof(nextAvailableVaryingLocation));
memset(m_NextAvailableVulkanResourceBinding, 0, sizeof(m_NextAvailableVulkanResourceBinding));
memset(m_NextAvailableGLSLResourceBinding, 0, sizeof(m_NextAvailableGLSLResourceBinding));
}
// Retrieve the location for a varying with a given name.
// If the name doesn't already have an allocated location, allocate one
// and store it into the map.
inline uint32_t GetVaryingLocation(const std::string &name, SHADER_TYPE eShaderType, bool isInput, bool keepLocation, uint32_t maxSemanticIndex)
{
int nspace = GetVaryingNamespace(eShaderType, isInput);
VaryingLocations::iterator itr = varyingLocationsMap[nspace].find(name);
if (itr != varyingLocationsMap[nspace].end())
return itr->second;
if (keepLocation)
{
// Try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
// Inspect last 2 characters in name
size_t len = name.length();
if (len > 1)
{
if (isdigit(name[len - 1]))
{
uint32_t index = 0;
if (isdigit(name[len - 2]))
index = atoi(&name[len - 2]); // 2-digits index
else
index = atoi(&name[len - 1]); // 1-digit index
if (index < 32) // Some platforms only allow 32 varying locations
{
// Check that index is not already used
bool canUseIndex = true;
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
{
if (it->second == index)
{
canUseIndex = false;
break;
}
}
if (canUseIndex)
{
varyingLocationsMap[nspace].insert(std::make_pair(name, index));
return index;
}
}
}
}
// fallback: pick an unused index (max of already allocated AND of semanticIndices found by SignatureAnalysis
uint32_t maxIndexAlreadyAssigned = 0;
for (VaryingLocations::iterator it = varyingLocationsMap[nspace].begin(); it != varyingLocationsMap[nspace].end(); ++it)
maxIndexAlreadyAssigned = std::max(maxIndexAlreadyAssigned, it->second);
uint32_t fallbackIndex = std::max(maxIndexAlreadyAssigned + 1, maxSemanticIndex + 1);
varyingLocationsMap[nspace].insert(std::make_pair(name, fallbackIndex));
return fallbackIndex;
}
else
{
uint32_t newKey = nextAvailableVaryingLocation[nspace];
nextAvailableVaryingLocation[nspace]++;
varyingLocationsMap[nspace].insert(std::make_pair(name, newKey));
return newKey;
}
}
// Retrieve the binding for a resource (texture, constant buffer, image) with a given name
// If not found, allocate a new one (in set 0) and return that
// The returned value is a pair of <set, binding>
// If the name contains "hlslcc_set_X_bind_Y", those values (from the first found occurence in the name)
// will be used instead, and all occurences of that string will be removed from name, so name parameter can be modified
// if allocRoomForCounter is true, the following binding number in the same set will be allocated with name + '_counter'
inline VulkanResourceBinding GetVulkanResourceBinding(std::string &name, bool allocRoomForCounter = false, uint32_t preferredSet = 0)
{
// scan for the special marker
const char *marker = "Xhlslcc_set_%d_bind_%dX";
uint32_t Set = 0, Binding = 0;
size_t startLoc = name.find("Xhlslcc");
if ((startLoc != std::string::npos) && (sscanf(name.c_str() + startLoc, marker, &Set, &Binding) == 2))
{
// Get rid of all markers
while ((startLoc = name.find("Xhlslcc")) != std::string::npos)
{
size_t endLoc = name.find('X', startLoc + 1);
if (endLoc == std::string::npos)
break;
name.erase(startLoc, endLoc - startLoc + 1);
}
// Add to map
VulkanResourceBinding newBind = { Set, Binding };
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = { Set, Binding + 1 };
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
return newBind;
}
VulkanResourceBindings::iterator itr = m_VulkanResourceBindings.find(name);
if (itr != m_VulkanResourceBindings.end())
return itr->second;
// Allocate a new one
VulkanResourceBinding newBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name, newBind));
if (allocRoomForCounter)
{
VulkanResourceBinding counterBind = { preferredSet, m_NextAvailableVulkanResourceBinding[preferredSet] };
m_NextAvailableVulkanResourceBinding[preferredSet]++;
m_VulkanResourceBindings.insert(std::make_pair(name + "_counter", counterBind));
}
return newBind;
}
// GLSL Bind point handling logic
// Handles both 'old style' fill around fixed UAV and new style partitioned offsets with fixed UAV locations
// HLSL has separate register spaces for UAV and structured buffers. GLSL has shared register space for all buffers.
// The aim here is to preserve the UAV buffer bindings as they are and use remaining binding points for structured buffers.
// In this step make m_structuredBufferBindPoints contain increasingly ordered uints starting from zero.
// This is only used when we are doing old style binding setup
void SetupGLSLResourceBindingSlotsIndices()
{
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS; i++)
{
m_StructuredBufferBindPoints[i] = i;
}
}
void RemoveBindPointFromAvailableList(uint32_t bindPoint)
{
for (uint32_t i = 0; i < MAX_RESOURCE_BINDINGS - 1 && m_StructuredBufferBindPoints[i] <= bindPoint; i++)
{
if (m_StructuredBufferBindPoints[i] == bindPoint) // Remove uav binding point from the list by copying array remainder here
{
memcpy(&m_StructuredBufferBindPoints[i], &m_StructuredBufferBindPoints[i + 1], (MAX_RESOURCE_BINDINGS - 1 - i) * sizeof(uint32_t));
break;
}
}
}
void ReserveNamedBindPoint(const std::string &name, uint32_t bindPoint, GLSLBufferType type)
{
m_GLSLResourceBindings.insert(std::make_pair(name, bindPoint));
RemoveBindPointFromAvailableList(bindPoint);
}
bool ShouldUseBufferSpecificBinding(GLSLBufferType bufferType)
{
return bufferType == BufferType_Constant || bufferType == BufferType_Texture || bufferType == BufferType_UBO;
}
uint32_t GetGLSLBufferBindPointIndex(GLSLBufferType bufferType)
{
uint32_t binding = -1;
if (ShouldUseBufferSpecificBinding(bufferType))
{
binding = m_NextAvailableGLSLResourceBinding[bufferType];
}
else
{
binding = m_StructuredBufferBindPoints[m_NextAvailableGLSLResourceBinding[BufferType_Generic]];
}
return binding;
}
void UpdateResourceBindingIndex(GLSLBufferType bufferType)
{
if (ShouldUseBufferSpecificBinding(bufferType))
{
m_NextAvailableGLSLResourceBinding[bufferType]++;
}
else
{
m_NextAvailableGLSLResourceBinding[BufferType_Generic]++;
}
}
inline GLSLBufferBindPointInfo GetGLSLResourceBinding(const std::string &name, GLSLBufferType bufferType)
{
GLSLResouceBindings::iterator itr = m_GLSLResourceBindings.find(name);
if (itr != m_GLSLResourceBindings.end())
{
return GLSLBufferBindPointInfo{ itr->second, true };
}
uint32_t binding = GetGLSLBufferBindPointIndex(bufferType);
UpdateResourceBindingIndex(bufferType);
m_GLSLResourceBindings.insert(std::make_pair(name, binding));
return GLSLBufferBindPointInfo{ binding, false };
}
//dcl_tessellator_partitioning and dcl_tessellator_output_primitive appear in hull shader for D3D,
//but they appear on inputs inside domain shaders for GL.
//Hull shader must be compiled before domain so the
//ensure correct partitioning and primitive type information
//can be saved when compiling hull and passed to domain compilation.
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
float fMaxTessFactor;
int numPatchesInThreadGroup;
bool hasControlPoint;
bool hasPatchConstant;
// Bitfield for the shader stages this program is going to include (see PS_FLAG_*).
// Needed so we can construct proper shader input and output names
uint32_t ui32ProgramStages;
std::vector<std::string> m_ExtBlendModes; // The blend modes (from KHR_blend_equation_advanced) requested for this shader. See ext spec for list.
inline INTERPOLATION_MODE GetInterpolationMode(uint32_t regNo)
{
if (regNo >= pixelInterpolation.size())
return INTERPOLATION_UNDEFINED;
else
return pixelInterpolation[regNo];
}
inline void SetInterpolationMode(uint32_t regNo, INTERPOLATION_MODE mode)
{
if (regNo >= pixelInterpolation.size())
pixelInterpolation.resize((regNo + 1) * 2, INTERPOLATION_UNDEFINED);
pixelInterpolation[regNo] = mode;
}
struct CompareFirst
{
CompareFirst(std::string val) : m_Val(val) {}
bool operator()(const std::pair<std::string, std::string>& elem) const
{
return m_Val == elem.first;
}
private:
std::string m_Val;
};
inline bool IsMemberDeclared(const std::string &name)
{
if (std::find_if(m_SharedFunctionMembers.begin(), m_SharedFunctionMembers.end(), CompareFirst(name)) != m_SharedFunctionMembers.end())
return true;
return false;
}
MemberDefinitions m_SharedFunctionMembers;
std::vector<std::string> m_SharedDependencies;
BindingSlotAllocator m_SharedTextureSlots, m_SharedSamplerSlots;
BindingSlotAllocator m_SharedBufferSlots;
inline void ClearCrossDependencyData()
{
pixelInterpolation.clear();
for (int i = 0; i < MAX_NAMESPACES; i++)
{
varyingLocationsMap[i].clear();
nextAvailableVaryingLocation[i] = 0;
}
m_SharedFunctionMembers.clear();
m_SharedDependencies.clear();
}
bool IsHullShaderInputAlreadyDeclared(const std::string& name)
{
bool isKnown = false;
for (size_t idx = 0, end = m_hullShaderInputs.size(); idx < end; ++idx)
{
if (m_hullShaderInputs[idx] == name)
{
isKnown = true;
break;
}
}
return isKnown;
}
void RecordHullShaderInput(const std::string& name)
{
m_hullShaderInputs.push_back(name);
}
std::vector<std::string> m_hullShaderInputs;
};
struct GLSLShader
{
int shaderType; //One of the GL enums.
std::string sourceCode;
ShaderInfo reflection;
GLLang GLSLLanguage;
TextureSamplerPairs textureSamplers; // HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS fills this out
};
// Interface for retrieving reflection and diagnostics data
class HLSLccReflection
{
public:
HLSLccReflection() {}
virtual ~HLSLccReflection() {}
// Called on errors or diagnostic messages
virtual void OnDiagnostics(const std::string &error, int line, bool isError) {}
virtual void OnInputBinding(const std::string &name, int bindIndex) {}
// Returns false if this constant buffer is not needed for this shader. This info can be used for pruning unused
// constant buffers and vars from compute shaders where we need broader context than a single kernel to know
// if something can be dropped, as the constant buffers are shared between all kernels in a .compute file.
virtual bool OnConstantBuffer(const std::string &name, size_t bufferSize, size_t memberCount) { return true; }
// Returns false if this constant var is not needed for this shader. See above.
virtual bool OnConstant(const std::string &name, int bindIndex, SHADER_VARIABLE_TYPE cType, int rows, int cols, bool isMatrix, int arraySize, bool isUsed) { return true; }
virtual void OnConstantBufferBinding(const std::string &name, int bindIndex) {}
virtual void OnTextureBinding(const std::string &name, int bindIndex, int samplerIndex, bool multisampled, HLSLCC_TEX_DIMENSION dim, bool isUAV) {}
virtual void OnBufferBinding(const std::string &name, int bindIndex, bool isUAV) {}
virtual void OnThreadGroupSize(unsigned int xSize, unsigned int ySize, unsigned int zSize) {}
virtual void OnTessellationInfo(uint32_t tessPartitionMode, uint32_t tessOutputWindingOrder, uint32_t tessMaxFactor, uint32_t tessNumPatchesInThreadGroup) {}
virtual void OnTessellationKernelInfo(uint32_t patchKernelBufferCount) {}
// these are for now metal only (but can be trivially added for other backends if needed)
// they are useful mostly for diagnostics as interim values are actually hidden from user
virtual void OnVertexProgramOutput(const std::string& name, const std::string& semantic, int semanticIndex) {}
virtual void OnBuiltinOutput(SPECIAL_NAME name) {}
virtual void OnFragmentOutputDeclaration(int numComponents, int outputIndex) {}
enum AccessType
{
ReadAccess = 1 << 0,
WriteAccess = 1 << 1
};
virtual void OnStorageImage(int bindIndex, unsigned int access) {}
};
/*HLSL constant buffers are treated as default-block unform arrays by default. This is done
to support versions of GLSL which lack ARB_uniform_buffer_object functionality.
Setting this flag causes each one to have its own uniform block.
Note: Currently the nth const buffer will be named UnformBufferN. This is likey to change to the original HLSL name in the future.*/
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT = 0x1;
static const unsigned int HLSLCC_FLAG_ORIGIN_UPPER_LEFT = 0x2;
static const unsigned int HLSLCC_FLAG_PIXEL_CENTER_INTEGER = 0x4;
static const unsigned int HLSLCC_FLAG_GLOBAL_CONSTS_NEVER_IN_UBO = 0x8;
//GS enabled?
//Affects vertex shader (i.e. need to compile vertex shader again to use with/without GS).
//This flag is needed in order for the interfaces between stages to match when GS is in use.
//PS inputs VtxGeoOutput
//GS outputs VtxGeoOutput
//Vs outputs VtxOutput if GS enabled. VtxGeoOutput otherwise.
static const unsigned int HLSLCC_FLAG_GS_ENABLED = 0x10;
static const unsigned int HLSLCC_FLAG_TESS_ENABLED = 0x20;
//Either use this flag or glBindFragDataLocationIndexed.
//When set the first pixel shader output is the first input to blend
//equation, the others go to the second input.
static const unsigned int HLSLCC_FLAG_DUAL_SOURCE_BLENDING = 0x40;
//If set, shader inputs and outputs are declared with their semantic name.
static const unsigned int HLSLCC_FLAG_INOUT_SEMANTIC_NAMES = 0x80;
//If set, shader inputs and outputs are declared with their semantic name appended.
static const unsigned int HLSLCC_FLAG_INOUT_APPEND_SEMANTIC_NAMES = 0x100;
//If set, combines texture/sampler pairs used together into samplers named "texturename_X_samplername".
static const unsigned int HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS = 0x200;
//If set, attribute and uniform explicit location qualifiers are disabled (even if the language version supports that)
static const unsigned int HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS = 0x400;
//If set, global uniforms are not stored in a struct.
static const unsigned int HLSLCC_FLAG_DISABLE_GLOBALS_STRUCT = 0x800;
//If set, image declarations will always have binding and format qualifiers.
static const unsigned int HLSLCC_FLAG_GLES31_IMAGE_QUALIFIERS = 0x1000;
// If set, treats sampler names ending with _highp, _mediump, and _lowp as sampler precision qualifiers
// Also removes that prefix from generated output
static const unsigned int HLSLCC_FLAG_SAMPLER_PRECISION_ENCODED_IN_NAME = 0x2000;
// If set, adds location qualifiers to intra-shader varyings.
static const unsigned int HLSLCC_FLAG_SEPARABLE_SHADER_OBJECTS = 0x4000; // NOTE: obsolete flag (behavior enabled by this flag began default in 83a16a1829cf)
// If set, wraps all uniform buffer declarations in a preprocessor macro #ifdef HLSLCC_ENABLE_UNIFORM_BUFFERS
// so that if that macro is undefined, all UBO declarations will become normal uniforms
static const unsigned int HLSLCC_FLAG_WRAP_UBO = 0x8000;
// If set, skips all members of the $Globals constant buffer struct that are not referenced in the shader code
static const unsigned int HLSLCC_FLAG_REMOVE_UNUSED_GLOBALS = 0x10000;
#define HLSLCC_TRANSLATE_MATRIX_FORMAT_STRING "hlslcc_mtx%dx%d"
// If set, translates all matrix declarations into vec4 arrays (as the DX bytecode treats them), and prefixes the name with 'hlslcc_mtx<rows>x<cols>'
static const unsigned int HLSLCC_FLAG_TRANSLATE_MATRICES = 0x20000;
// If set, emits Vulkan-style (set, binding) bindings, also captures that info from any declaration named "<Name>_hlslcc_set_X_bind_Y"
// Unless bindings are given explicitly, they are allocated into set 0 (map stored in GLSLCrossDependencyData)
static const unsigned int HLSLCC_FLAG_VULKAN_BINDINGS = 0x40000;
// If set, metal output will use linear sampler for shadow compares, otherwise point sampler.
static const unsigned int HLSLCC_FLAG_METAL_SHADOW_SAMPLER_LINEAR = 0x80000;
// If set, avoid emit atomic counter (ARB_shader_atomic_counters) and use atomic functions provided by ARB_shader_storage_buffer_object instead.
static const unsigned int HLSLCC_FLAG_AVOID_SHADER_ATOMIC_COUNTERS = 0x100000;
// Unused 0x200000;
// If set, this shader uses the GLSL extension EXT_shader_framebuffer_fetch
static const unsigned int HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH = 0x400000;
// Build for Switch.
static const unsigned int HLSLCC_FLAG_NVN_TARGET = 0x800000;
// If set, generate an instance name for constant buffers. GLSL specs 4.5 disallows uniform variables from different constant buffers sharing the same name
// as long as they are part of the same final linked program. Uniform buffer instance names solve this cross-shader symbol conflict issue.
static const unsigned int HLSLCC_FLAG_UNIFORM_BUFFER_OBJECT_WITH_INSTANCE_NAME = 0x1000000;
// Massage shader steps into Metal compute kernel from vertex/hull shaders + post-tessellation vertex shader from domain shader
static const unsigned int HLSLCC_FLAG_METAL_TESSELLATION = 0x2000000;
// Disable fastmath
static const unsigned int HLSLCC_FLAG_DISABLE_FASTMATH = 0x4000000;
//If set, uniform explicit location qualifiers are enabled (even if the language version doesn't support that)
static const unsigned int HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS = 0x8000000;
// If set, each line of the generated source will be preceded by a comment specifying which DirectX bytecode instruction it maps to
static const unsigned int HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS = 0x10000000;
// If set, try to generate consistent varying locations based on the semantic indices in the hlsl source, i.e "TEXCOORD11" gets assigned to layout(location = 11)
static const unsigned int HLSLCC_FLAG_KEEP_VARYING_LOCATIONS = 0x20000000;
// Code generation might vary for mobile targets, or using lower sampler precision than full by default
static const unsigned int HLSLCC_FLAG_MOBILE_TARGET = 0x40000000;
#ifdef __cplusplus
extern "C" {
#endif
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result
);
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result);
#ifdef __cplusplus
}
#endif
#endif

View File

@ -1,3 +0,0 @@
extern "C" {
#include "hlslcc.h"
}

View File

@ -1,799 +0,0 @@
/* A portable stdint.h
****************************************************************************
* BSD License:
****************************************************************************
*
* Copyright (c) 2005-2011 Paul Hsieh
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
*
* 1. Redistributions of source code must retain the above copyright
* notice, this list of conditions and the following disclaimer.
* 2. Redistributions in binary form must reproduce the above copyright
* notice, this list of conditions and the following disclaimer in the
* documentation and/or other materials provided with the distribution.
* 3. The name of the author may not be used to endorse or promote products
* derived from this software without specific prior written permission.
*
* THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
* IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
* OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
* IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
* INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
* NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
* THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*
****************************************************************************
*
* Version 0.1.12
*
* The ANSI C standard committee, for the C99 standard, specified the
* inclusion of a new standard include file called stdint.h. This is
* a very useful and long desired include file which contains several
* very precise definitions for integer scalar types that is
* critically important for making portable several classes of
* applications including cryptography, hashing, variable length
* integer libraries and so on. But for most developers its likely
* useful just for programming sanity.
*
* The problem is that most compiler vendors have decided not to
* implement the C99 standard, and the next C++ language standard
* (which has a lot more mindshare these days) will be a long time in
* coming and its unknown whether or not it will include stdint.h or
* how much adoption it will have. Either way, it will be a long time
* before all compilers come with a stdint.h and it also does nothing
* for the extremely large number of compilers available today which
* do not include this file, or anything comparable to it.
*
* So that's what this file is all about. Its an attempt to build a
* single universal include file that works on as many platforms as
* possible to deliver what stdint.h is supposed to. A few things
* that should be noted about this file:
*
* 1) It is not guaranteed to be portable and/or present an identical
* interface on all platforms. The extreme variability of the
* ANSI C standard makes this an impossibility right from the
* very get go. Its really only meant to be useful for the vast
* majority of platforms that possess the capability of
* implementing usefully and precisely defined, standard sized
* integer scalars. Systems which are not intrinsically 2s
* complement may produce invalid constants.
*
* 2) There is an unavoidable use of non-reserved symbols.
*
* 3) Other standard include files are invoked.
*
* 4) This file may come in conflict with future platforms that do
* include stdint.h. The hope is that one or the other can be
* used with no real difference.
*
* 5) In the current verison, if your platform can't represent
* int32_t, int16_t and int8_t, it just dumps out with a compiler
* error.
*
* 6) 64 bit integers may or may not be defined. Test for their
* presence with the test: #ifdef INT64_MAX or #ifdef UINT64_MAX.
* Note that this is different from the C99 specification which
* requires the existence of 64 bit support in the compiler. If
* this is not defined for your platform, yet it is capable of
* dealing with 64 bits then it is because this file has not yet
* been extended to cover all of your system's capabilities.
*
* 7) (u)intptr_t may or may not be defined. Test for its presence
* with the test: #ifdef PTRDIFF_MAX. If this is not defined
* for your platform, then it is because this file has not yet
* been extended to cover all of your system's capabilities, not
* because its optional.
*
* 8) The following might not been defined even if your platform is
* capable of defining it:
*
* WCHAR_MIN
* WCHAR_MAX
* (u)int64_t
* PTRDIFF_MIN
* PTRDIFF_MAX
* (u)intptr_t
*
* 9) The following have not been defined:
*
* WINT_MIN
* WINT_MAX
*
* 10) The criteria for defining (u)int_least(*)_t isn't clear,
* except for systems which don't have a type that precisely
* defined 8, 16, or 32 bit types (which this include file does
* not support anyways). Default definitions have been given.
*
* 11) The criteria for defining (u)int_fast(*)_t isn't something I
* would trust to any particular compiler vendor or the ANSI C
* committee. It is well known that "compatible systems" are
* commonly created that have very different performance
* characteristics from the systems they are compatible with,
* especially those whose vendors make both the compiler and the
* system. Default definitions have been given, but its strongly
* recommended that users never use these definitions for any
* reason (they do *NOT* deliver any serious guarantee of
* improved performance -- not in this file, nor any vendor's
* stdint.h).
*
* 12) The following macros:
*
* PRINTF_INTMAX_MODIFIER
* PRINTF_INT64_MODIFIER
* PRINTF_INT32_MODIFIER
* PRINTF_INT16_MODIFIER
* PRINTF_LEAST64_MODIFIER
* PRINTF_LEAST32_MODIFIER
* PRINTF_LEAST16_MODIFIER
* PRINTF_INTPTR_MODIFIER
*
* are strings which have been defined as the modifiers required
* for the "d", "u" and "x" printf formats to correctly output
* (u)intmax_t, (u)int64_t, (u)int32_t, (u)int16_t, (u)least64_t,
* (u)least32_t, (u)least16_t and (u)intptr_t types respectively.
* PRINTF_INTPTR_MODIFIER is not defined for some systems which
* provide their own stdint.h. PRINTF_INT64_MODIFIER is not
* defined if INT64_MAX is not defined. These are an extension
* beyond what C99 specifies must be in stdint.h.
*
* In addition, the following macros are defined:
*
* PRINTF_INTMAX_HEX_WIDTH
* PRINTF_INT64_HEX_WIDTH
* PRINTF_INT32_HEX_WIDTH
* PRINTF_INT16_HEX_WIDTH
* PRINTF_INT8_HEX_WIDTH
* PRINTF_INTMAX_DEC_WIDTH
* PRINTF_INT64_DEC_WIDTH
* PRINTF_INT32_DEC_WIDTH
* PRINTF_INT16_DEC_WIDTH
* PRINTF_INT8_DEC_WIDTH
*
* Which specifies the maximum number of characters required to
* print the number of that type in either hexadecimal or decimal.
* These are an extension beyond what C99 specifies must be in
* stdint.h.
*
* Compilers tested (all with 0 warnings at their highest respective
* settings): Borland Turbo C 2.0, WATCOM C/C++ 11.0 (16 bits and 32
* bits), Microsoft Visual C++ 6.0 (32 bit), Microsoft Visual Studio
* .net (VC7), Intel C++ 4.0, GNU gcc v3.3.3
*
* This file should be considered a work in progress. Suggestions for
* improvements, especially those which increase coverage are strongly
* encouraged.
*
* Acknowledgements
*
* The following people have made significant contributions to the
* development and testing of this file:
*
* Chris Howie
* John Steele Scott
* Dave Thorup
* John Dill
*
*/
#include <stddef.h>
#include <limits.h>
#include <signal.h>
/*
* For gcc with _STDINT_H, fill in the PRINTF_INT*_MODIFIER macros, and
* do nothing else. On the Mac OS X version of gcc this is _STDINT_H_.
*/
#if ((defined(__STDC__) && __STDC__ && __STDC_VERSION__ >= 199901L) || (defined(__WATCOMC__) && (defined(_STDINT_H_INCLUDED) || __WATCOMC__ >= 1250)) || (defined(__GNUC__) && (defined(_STDINT_H) || defined(_STDINT_H_) || defined(__UINT_FAST64_TYPE__)))) && !defined(_PSTDINT_H_INCLUDED)
#include <stdint.h>
#define _PSTDINT_H_INCLUDED
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
# endif
# ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
# endif
# ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
# endif
# ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
# endif
# ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
# endif
# ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
# endif
# ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
# endif
# ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
/*
* Something really weird is going on with Open Watcom. Just pull some of
* these duplicated definitions from Open Watcom's stdint.h file for now.
*/
# if defined(__WATCOMC__) && __WATCOMC__ >= 1250
# if !defined(INT64_C)
# define INT64_C(x) (x + (INT64_MAX - INT64_MAX))
# endif
# if !defined(UINT64_C)
# define UINT64_C(x) (x + (UINT64_MAX - UINT64_MAX))
# endif
# if !defined(INT32_C)
# define INT32_C(x) (x + (INT32_MAX - INT32_MAX))
# endif
# if !defined(UINT32_C)
# define UINT32_C(x) (x + (UINT32_MAX - UINT32_MAX))
# endif
# if !defined(INT16_C)
# define INT16_C(x) (x)
# endif
# if !defined(UINT16_C)
# define UINT16_C(x) (x)
# endif
# if !defined(INT8_C)
# define INT8_C(x) (x)
# endif
# if !defined(UINT8_C)
# define UINT8_C(x) (x)
# endif
# if !defined(UINT64_MAX)
# define UINT64_MAX 18446744073709551615ULL
# endif
# if !defined(INT64_MAX)
# define INT64_MAX 9223372036854775807LL
# endif
# if !defined(UINT32_MAX)
# define UINT32_MAX 4294967295UL
# endif
# if !defined(INT32_MAX)
# define INT32_MAX 2147483647L
# endif
# if !defined(INTMAX_MAX)
# define INTMAX_MAX INT64_MAX
# endif
# if !defined(INTMAX_MIN)
# define INTMAX_MIN INT64_MIN
# endif
# endif
#endif
#ifndef _PSTDINT_H_INCLUDED
#define _PSTDINT_H_INCLUDED
#ifndef SIZE_MAX
# define SIZE_MAX (~(size_t)0)
#endif
/*
* Deduce the type assignments from limits.h under the assumption that
* integer sizes in bits are powers of 2, and follow the ANSI
* definitions.
*/
#ifndef UINT8_MAX
# define UINT8_MAX 0xff
#endif
#ifndef uint8_t
# if (UCHAR_MAX == UINT8_MAX) || defined(S_SPLINT_S)
typedef unsigned char uint8_t;
# define UINT8_C(v) ((uint8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef INT8_MAX
# define INT8_MAX 0x7f
#endif
#ifndef INT8_MIN
# define INT8_MIN INT8_C(0x80)
#endif
#ifndef int8_t
# if (SCHAR_MAX == INT8_MAX) || defined(S_SPLINT_S)
typedef signed char int8_t;
# define INT8_C(v) ((int8_t) v)
# else
# error "Platform not supported"
# endif
#endif
#ifndef UINT16_MAX
# define UINT16_MAX 0xffff
#endif
#ifndef uint16_t
#if (UINT_MAX == UINT16_MAX) || defined(S_SPLINT_S)
typedef unsigned int uint16_t;
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
# define UINT16_C(v) ((uint16_t) (v))
#elif (USHRT_MAX == UINT16_MAX)
typedef unsigned short uint16_t;
# define UINT16_C(v) ((uint16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT16_MAX
# define INT16_MAX 0x7fff
#endif
#ifndef INT16_MIN
# define INT16_MIN INT16_C(0x8000)
#endif
#ifndef int16_t
#if (INT_MAX == INT16_MAX) || defined(S_SPLINT_S)
typedef signed int int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER ""
# endif
#elif (SHRT_MAX == INT16_MAX)
typedef signed short int16_t;
# define INT16_C(v) ((int16_t) (v))
# ifndef PRINTF_INT16_MODIFIER
# define PRINTF_INT16_MODIFIER "h"
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef UINT32_MAX
# define UINT32_MAX (0xffffffffUL)
#endif
#ifndef uint32_t
#if (ULONG_MAX == UINT32_MAX) || defined(S_SPLINT_S)
typedef unsigned long uint32_t;
# define UINT32_C(v) v ## UL
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (UINT_MAX == UINT32_MAX)
typedef unsigned int uint32_t;
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
# define UINT32_C(v) v ## U
#elif (USHRT_MAX == UINT32_MAX)
typedef unsigned short uint32_t;
# define UINT32_C(v) ((unsigned short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
#ifndef INT32_MAX
# define INT32_MAX (0x7fffffffL)
#endif
#ifndef INT32_MIN
# define INT32_MIN INT32_C(0x80000000)
#endif
#ifndef int32_t
#if (LONG_MAX == INT32_MAX) || defined(S_SPLINT_S)
typedef signed long int32_t;
# define INT32_C(v) v ## L
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER "l"
# endif
#elif (INT_MAX == INT32_MAX)
typedef signed int int32_t;
# define INT32_C(v) v
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#elif (SHRT_MAX == INT32_MAX)
typedef signed short int32_t;
# define INT32_C(v) ((short) (v))
# ifndef PRINTF_INT32_MODIFIER
# define PRINTF_INT32_MODIFIER ""
# endif
#else
#error "Platform not supported"
#endif
#endif
/*
* The macro stdint_int64_defined is temporarily used to record
* whether or not 64 integer support is available. It must be
* defined for any 64 integer extensions for new platforms that are
* added.
*/
#undef stdint_int64_defined
#if (defined(__STDC__) && defined(__STDC_VERSION__)) || defined(S_SPLINT_S)
# if (__STDC__ && __STDC_VERSION__ >= 199901L) || defined(S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# endif
#endif
#if !defined(stdint_int64_defined)
# if defined(__GNUC__)
# define stdint_int64_defined
__extension__ typedef long long int64_t;
__extension__ typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif defined(__MWERKS__) || defined(__SUNPRO_C) || defined(__SUNPRO_CC) || defined(__APPLE_CC__) || defined(_LONG_LONG) || defined(_CRAYC) || defined(S_SPLINT_S)
# define stdint_int64_defined
typedef long long int64_t;
typedef unsigned long long uint64_t;
# define UINT64_C(v) v ## ULL
# define INT64_C(v) v ## LL
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "ll"
# endif
# elif (defined(__WATCOMC__) && defined(__WATCOM_INT64__)) || (defined(_MSC_VER) && _INTEGRAL_MAX_BITS >= 64) || (defined(__BORLANDC__) && __BORLANDC__ > 0x460) || defined(__alpha) || defined(__DECC)
# define stdint_int64_defined
typedef __int64 int64_t;
typedef unsigned __int64 uint64_t;
# define UINT64_C(v) v ## UI64
# define INT64_C(v) v ## I64
# ifndef PRINTF_INT64_MODIFIER
# define PRINTF_INT64_MODIFIER "I64"
# endif
# endif
#endif
#if !defined(LONG_LONG_MAX) && defined(INT64_C)
# define LONG_LONG_MAX INT64_C (9223372036854775807)
#endif
#ifndef ULONG_LONG_MAX
# define ULONG_LONG_MAX UINT64_C (18446744073709551615)
#endif
#if !defined(INT64_MAX) && defined(INT64_C)
# define INT64_MAX INT64_C (9223372036854775807)
#endif
#if !defined(INT64_MIN) && defined(INT64_C)
# define INT64_MIN INT64_C (-9223372036854775808)
#endif
#if !defined(UINT64_MAX) && defined(INT64_C)
# define UINT64_MAX UINT64_C (18446744073709551615)
#endif
/*
* Width of hexadecimal for number field.
*/
#ifndef PRINTF_INT64_HEX_WIDTH
# define PRINTF_INT64_HEX_WIDTH "16"
#endif
#ifndef PRINTF_INT32_HEX_WIDTH
# define PRINTF_INT32_HEX_WIDTH "8"
#endif
#ifndef PRINTF_INT16_HEX_WIDTH
# define PRINTF_INT16_HEX_WIDTH "4"
#endif
#ifndef PRINTF_INT8_HEX_WIDTH
# define PRINTF_INT8_HEX_WIDTH "2"
#endif
#ifndef PRINTF_INT64_DEC_WIDTH
# define PRINTF_INT64_DEC_WIDTH "20"
#endif
#ifndef PRINTF_INT32_DEC_WIDTH
# define PRINTF_INT32_DEC_WIDTH "10"
#endif
#ifndef PRINTF_INT16_DEC_WIDTH
# define PRINTF_INT16_DEC_WIDTH "5"
#endif
#ifndef PRINTF_INT8_DEC_WIDTH
# define PRINTF_INT8_DEC_WIDTH "3"
#endif
/*
* Ok, lets not worry about 128 bit integers for now. Moore's law says
* we don't need to worry about that until about 2040 at which point
* we'll have bigger things to worry about.
*/
#ifdef stdint_int64_defined
typedef int64_t intmax_t;
typedef uint64_t uintmax_t;
# define INTMAX_MAX INT64_MAX
# define INTMAX_MIN INT64_MIN
# define UINTMAX_MAX UINT64_MAX
# define UINTMAX_C(v) UINT64_C(v)
# define INTMAX_C(v) INT64_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT64_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT64_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT64_DEC_WIDTH
# endif
#else
typedef int32_t intmax_t;
typedef uint32_t uintmax_t;
# define INTMAX_MAX INT32_MAX
# define UINTMAX_MAX UINT32_MAX
# define UINTMAX_C(v) UINT32_C(v)
# define INTMAX_C(v) INT32_C(v)
# ifndef PRINTF_INTMAX_MODIFIER
# define PRINTF_INTMAX_MODIFIER PRINTF_INT32_MODIFIER
# endif
# ifndef PRINTF_INTMAX_HEX_WIDTH
# define PRINTF_INTMAX_HEX_WIDTH PRINTF_INT32_HEX_WIDTH
# endif
# ifndef PRINTF_INTMAX_DEC_WIDTH
# define PRINTF_INTMAX_DEC_WIDTH PRINTF_INT32_DEC_WIDTH
# endif
#endif
/*
* Because this file currently only supports platforms which have
* precise powers of 2 as bit sizes for the default integers, the
* least definitions are all trivial. Its possible that a future
* version of this file could have different definitions.
*/
#ifndef stdint_least_defined
typedef int8_t int_least8_t;
typedef uint8_t uint_least8_t;
typedef int16_t int_least16_t;
typedef uint16_t uint_least16_t;
typedef int32_t int_least32_t;
typedef uint32_t uint_least32_t;
# define PRINTF_LEAST32_MODIFIER PRINTF_INT32_MODIFIER
# define PRINTF_LEAST16_MODIFIER PRINTF_INT16_MODIFIER
# define UINT_LEAST8_MAX UINT8_MAX
# define INT_LEAST8_MAX INT8_MAX
# define UINT_LEAST16_MAX UINT16_MAX
# define INT_LEAST16_MAX INT16_MAX
# define UINT_LEAST32_MAX UINT32_MAX
# define INT_LEAST32_MAX INT32_MAX
# define INT_LEAST8_MIN INT8_MIN
# define INT_LEAST16_MIN INT16_MIN
# define INT_LEAST32_MIN INT32_MIN
# ifdef stdint_int64_defined
typedef int64_t int_least64_t;
typedef uint64_t uint_least64_t;
# define PRINTF_LEAST64_MODIFIER PRINTF_INT64_MODIFIER
# define UINT_LEAST64_MAX UINT64_MAX
# define INT_LEAST64_MAX INT64_MAX
# define INT_LEAST64_MIN INT64_MIN
# endif
#endif
#undef stdint_least_defined
/*
* The ANSI C committee pretending to know or specify anything about
* performance is the epitome of misguided arrogance. The mandate of
* this file is to *ONLY* ever support that absolute minimum
* definition of the fast integer types, for compatibility purposes.
* No extensions, and no attempt to suggest what may or may not be a
* faster integer type will ever be made in this file. Developers are
* warned to stay away from these types when using this or any other
* stdint.h.
*/
typedef int_least8_t int_fast8_t;
typedef uint_least8_t uint_fast8_t;
typedef int_least16_t int_fast16_t;
typedef uint_least16_t uint_fast16_t;
typedef int_least32_t int_fast32_t;
typedef uint_least32_t uint_fast32_t;
#define UINT_FAST8_MAX UINT_LEAST8_MAX
#define INT_FAST8_MAX INT_LEAST8_MAX
#define UINT_FAST16_MAX UINT_LEAST16_MAX
#define INT_FAST16_MAX INT_LEAST16_MAX
#define UINT_FAST32_MAX UINT_LEAST32_MAX
#define INT_FAST32_MAX INT_LEAST32_MAX
#define INT_FAST8_MIN INT_LEAST8_MIN
#define INT_FAST16_MIN INT_LEAST16_MIN
#define INT_FAST32_MIN INT_LEAST32_MIN
#ifdef stdint_int64_defined
typedef int_least64_t int_fast64_t;
typedef uint_least64_t uint_fast64_t;
# define UINT_FAST64_MAX UINT_LEAST64_MAX
# define INT_FAST64_MAX INT_LEAST64_MAX
# define INT_FAST64_MIN INT_LEAST64_MIN
#endif
#undef stdint_int64_defined
/*
* Whatever piecemeal, per compiler thing we can do about the wchar_t
* type limits.
*/
#if defined(__WATCOMC__) || defined(_MSC_VER) || defined(__GNUC__)
# include <wchar.h>
# ifndef WCHAR_MIN
# define WCHAR_MIN 0
# endif
# ifndef WCHAR_MAX
# define WCHAR_MAX ((wchar_t)-1)
# endif
#endif
/*
* Whatever piecemeal, per compiler/platform thing we can do about the
* (u)intptr_t types and limits.
*/
#if defined(_MSC_VER) && defined(_UINTPTR_T_DEFINED)
# define STDINT_H_UINTPTR_T_DEFINED
#endif
#ifndef STDINT_H_UINTPTR_T_DEFINED
# if defined(__alpha__) || defined(__ia64__) || defined(__x86_64__) || defined(_WIN64)
# define stdint_intptr_bits 64
# elif defined(__WATCOMC__) || defined(__TURBOC__)
# if defined(__TINY__) || defined(__SMALL__) || defined(__MEDIUM__)
# define stdint_intptr_bits 16
# else
# define stdint_intptr_bits 32
# endif
# elif defined(__i386__) || defined(_WIN32) || defined(WIN32)
# define stdint_intptr_bits 32
# elif defined(__INTEL_COMPILER)
#error Unknown compiler
# endif
# ifdef stdint_intptr_bits
# define stdint_intptr_glue3_i(a, b, c) a##b##c
# define stdint_intptr_glue3(a, b, c) stdint_intptr_glue3_i(a,b,c)
# ifndef PRINTF_INTPTR_MODIFIER
# define PRINTF_INTPTR_MODIFIER stdint_intptr_glue3(PRINTF_INT,stdint_intptr_bits,_MODIFIER)
# endif
# ifndef PTRDIFF_MAX
# define PTRDIFF_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef PTRDIFF_MIN
# define PTRDIFF_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef UINTPTR_MAX
# define UINTPTR_MAX stdint_intptr_glue3(UINT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MAX
# define INTPTR_MAX stdint_intptr_glue3(INT,stdint_intptr_bits,_MAX)
# endif
# ifndef INTPTR_MIN
# define INTPTR_MIN stdint_intptr_glue3(INT,stdint_intptr_bits,_MIN)
# endif
# ifndef INTPTR_C
# define INTPTR_C(x) stdint_intptr_glue3(INT,stdint_intptr_bits,_C)(x)
# endif
# ifndef UINTPTR_C
# define UINTPTR_C(x) stdint_intptr_glue3(UINT,stdint_intptr_bits,_C)(x)
# endif
typedef stdint_intptr_glue3 (uint, stdint_intptr_bits, _t) uintptr_t;
typedef stdint_intptr_glue3 (int, stdint_intptr_bits, _t) intptr_t;
# else
#error Unknown compiler
# endif
# define STDINT_H_UINTPTR_T_DEFINED
#endif
/*
* Assumes sig_atomic_t is signed and we have a 2s complement machine.
*/
#ifndef SIG_ATOMIC_MAX
# define SIG_ATOMIC_MAX ((((sig_atomic_t) 1) << (sizeof (sig_atomic_t)*CHAR_BIT-1)) - 1)
#endif
#endif
#if defined(__TEST_PSTDINT_FOR_CORRECTNESS)
/*
* Please compile with the maximum warning settings to make sure macros are not
* defined more than once.
*/
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#define glue3_aux(x, y, z) x ## y ## z
#define glue3(x, y, z) glue3_aux(x,y,z)
#define DECLU(bits) glue3(uint,bits,_t) glue3(u,bits,=) glue3(UINT,bits,_C) (0);
#define DECLI(bits) glue3(int,bits,_t) glue3(i,bits,=) glue3(INT,bits,_C) (0);
#define DECL(us, bits) glue3(DECL,us,) (bits)
#define TESTUMAX(bits) glue3(u,bits,=) glue3(~,u,bits); if (glue3(UINT,bits,_MAX) glue3(!=,u,bits)) printf ("Something wrong with UINT%d_MAX\n", bits)
int main()
{
DECL(I, 8)
DECL(U, 8)
DECL(I, 16)
DECL(U, 16)
DECL(I, 32)
DECL(U, 32)
#ifdef INT64_MAX
DECL(I, 64)
DECL(U, 64)
#endif
intmax_t imax = INTMAX_C(0);
uintmax_t umax = UINTMAX_C(0);
char str0[256], str1[256];
sprintf(str0, "%d %x\n", 0, ~0);
sprintf(str1, "%d %x\n", i8, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with i8 : %s\n", str1);
sprintf(str1, "%u %x\n", u8, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with u8 : %s\n", str1);
sprintf(str1, "%d %x\n", i16, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with i16 : %s\n", str1);
sprintf(str1, "%u %x\n", u16, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with u16 : %s\n", str1);
sprintf(str1, "%" PRINTF_INT32_MODIFIER "d %x\n", i32, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with i32 : %s\n", str1);
sprintf(str1, "%" PRINTF_INT32_MODIFIER "u %x\n", u32, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with u32 : %s\n", str1);
#ifdef INT64_MAX
sprintf(str1, "%" PRINTF_INT64_MODIFIER "d %x\n", i64, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with i64 : %s\n", str1);
#endif
sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "d %x\n", imax, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with imax : %s\n", str1);
sprintf(str1, "%" PRINTF_INTMAX_MODIFIER "u %x\n", umax, ~0);
if (0 != strcmp(str0, str1)) printf("Something wrong with umax : %s\n", str1);
TESTUMAX(8);
TESTUMAX(16);
TESTUMAX(32);
#ifdef INT64_MAX
TESTUMAX(64);
#endif
return EXIT_SUCCESS;
}
#endif

View File

@ -1,53 +0,0 @@
Original HLSLcc source code Copyright (c) 2012 James Jones
Further improvements Copyright (c) 2014-2016 Unity Technologies
All Rights Reserved.
Permission is hereby granted, free of charge, to any person obtaining a
copy of this software and associated documentation files (the "Software"),
to deal in the Software without restriction, including without limitation
the rights to use, copy, modify, merge, publish, distribute, sublicense,
and/or sell copies of the Software, and to permit persons to whom the
Software is furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included
in all copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
THE SOFTWARE.
This software makes use of the bstring library which is provided under the following license:
Copyright (c) 2002-2008 Paul Hsieh
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of bstrlib nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,815 +0,0 @@
#include "internal_includes/debug.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/ControlFlowGraphUtils.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
#include "internal_includes/HLSLccToolkit.h"
#include <algorithm>
using namespace HLSLcc::ControlFlow;
using HLSLcc::ForEachOperand;
const BasicBlock &ControlFlowGraph::Build(const Instruction* firstInstruction, const Instruction* endInstruction)
{
using std::for_each;
m_BlockMap.clear();
m_BlockStorage.clear();
// Self-registering into m_BlockStorage so it goes out of the scope when ControlFlowGraph does
BasicBlock *root = new BasicBlock(Utils::GetNextNonLabelInstruction(firstInstruction), *this, NULL, endInstruction);
// Build the reachable set for each block
bool hadChanges;
do
{
hadChanges = false;
for_each(m_BlockStorage.begin(), m_BlockStorage.end(), [&](const shared_ptr<BasicBlock> &bb)
{
BasicBlock &b = *bb.get();
if (b.RebuildReachable())
{
hadChanges = true;
}
});
}
while (hadChanges == true);
return *root;
}
const BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction) const
{
BasicBlockMap::const_iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
if (itr == m_BlockMap.end())
return NULL;
return itr->second;
}
BasicBlock *ControlFlowGraph::GetBasicBlockForInstruction(const Instruction *instruction)
{
BasicBlockMap::iterator itr = m_BlockMap.find(Utils::GetNextNonLabelInstruction(instruction));
if (itr == m_BlockMap.end())
return NULL;
return itr->second;
}
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build().
// Auto-registers itself into ControlFlowGraph
BasicBlock::BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* endInstruction)
: m_Graph(graph)
, m_First(psFirst)
, m_Last(NULL)
, m_End(endInstruction)
{
m_UEVar.clear();
m_VarKill.clear();
m_Preceding.clear();
m_Succeeding.clear();
m_DEDef.clear();
m_Reachable.clear();
// Check that we've pruned the labels
ASSERT(psFirst == Utils::GetNextNonLabelInstruction(psFirst));
// Insert to block storage, block map and connect to previous block
m_Graph.m_BlockStorage.push_back(shared_ptr<BasicBlock>(this));
bool didInsert = m_Graph.m_BlockMap.insert(std::make_pair(psFirst, this)).second;
ASSERT(didInsert);
if (psPrecedingBlockHead != NULL)
{
m_Preceding.insert(psPrecedingBlockHead);
BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(psPrecedingBlockHead);
ASSERT(prec != 0);
didInsert = prec->m_Succeeding.insert(psFirst).second;
ASSERT(didInsert);
}
Build();
}
void BasicBlock::Build()
{
const Instruction *inst = m_First;
while (inst != m_End)
{
// Process sources first
ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Is this idx already in the kill set, meaning that it's already been re-defined in this basic block? Ignore
if (m_VarKill.find(regIdx) != m_VarKill.end())
continue;
// Add to UEVars set. Doesn't matter if it's already there.
m_UEVar.insert(regIdx);
}
return;
});
// Then the destination operands
ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND,
[this](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Add to kill set. Dupes are fine, this is a set.
m_VarKill.insert(regIdx);
// Also into the downward definitions. Overwrite the previous definition in this basic block, if any
Definition d(psInst, psOperand);
m_DEDef[regIdx].clear();
m_DEDef[regIdx].insert(d);
}
return;
});
// Check for flow control instructions
bool blockDone = false;
switch (inst->eOpcode)
{
default:
break;
case OPCODE_RET:
// Continue processing, in the case of unreachable code we still need to translate it properly (case 1160309)
// blockDone = true;
break;
case OPCODE_RETC:
// Basic block is done, start a next one.
// There REALLY should be no existing blocks for this one
ASSERT(m_Graph.GetBasicBlockForInstruction(Utils::GetNextNonLabelInstruction(inst + 1)) == NULL);
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
blockDone = true;
break;
case OPCODE_LOOP:
case OPCODE_CASE:
case OPCODE_ENDIF:
case OPCODE_ENDSWITCH:
// Not a flow control branch, but need to start a new block anyway.
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
blockDone = true;
break;
// Branches
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CONTINUEC:
{
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
ASSERT(jumpPoint != NULL);
// The control branches to the next instruction or jumps to jumpPoint
AddChildBasicBlock(Utils::GetNextNonLabelInstruction(inst + 1));
AddChildBasicBlock(jumpPoint);
blockDone = true;
break;
}
case OPCODE_SWITCH:
{
bool sawEndSwitch = false;
bool needConnectToParent = false;
const Instruction *jumpPoint = Utils::GetJumpPoint(inst, &sawEndSwitch, &needConnectToParent);
ASSERT(jumpPoint != NULL);
while (1)
{
if (!sawEndSwitch || needConnectToParent)
AddChildBasicBlock(jumpPoint);
if (sawEndSwitch)
break;
// The -1 is a bit of a hack: we always scroll past all labels so rewind to the last one so we'll know to search for the next label
ASSERT((jumpPoint - 1)->eOpcode == OPCODE_CASE || (jumpPoint - 1)->eOpcode == OPCODE_DEFAULT);
jumpPoint = Utils::GetJumpPoint(jumpPoint - 1, &sawEndSwitch, &needConnectToParent);
ASSERT(jumpPoint != NULL);
}
blockDone = true;
break;
}
// Non-conditional jumps
case OPCODE_BREAK:
case OPCODE_ELSE:
case OPCODE_CONTINUE:
case OPCODE_ENDLOOP:
{
const Instruction *jumpPoint = Utils::GetJumpPoint(inst);
ASSERT(jumpPoint != NULL);
AddChildBasicBlock(jumpPoint);
blockDone = true;
break;
}
}
if (blockDone)
break;
inst++;
}
// In initial building phase, just make m_Reachable equal to m_DEDef
m_Reachable = m_DEDef;
// Tag the end of the basic block
m_Last = std::max(m_First, std::min(inst, m_End - 1));
// printf("Basic Block %d -> %d\n", (int)m_First->id, (int)m_Last->id);
}
BasicBlock * BasicBlock::AddChildBasicBlock(const Instruction *psFirst)
{
// First see if this already exists
BasicBlock *b = m_Graph.GetBasicBlockForInstruction(psFirst);
if (b)
{
// Just add dependency and we're done
b->m_Preceding.insert(m_First);
m_Succeeding.insert(psFirst);
return b;
}
// Otherwise create one. Self-registering and self-connecting
return new BasicBlock(psFirst, m_Graph, m_First, m_End);
}
bool BasicBlock::RebuildReachable()
{
// Building the Reachable set is an iterative process, where each block gets rebuilt until nothing changes.
// Formula: reachable = this.DEDef union ( each preceding.Reachable() minus this.VarKill())
ReachableVariables newReachable = m_DEDef;
bool hasChanges = false;
// Loop each predecessor
std::for_each(Preceding().begin(), Preceding().end(), [&](const Instruction *instr)
{
const BasicBlock *prec = m_Graph.GetBasicBlockForInstruction(instr);
const ReachableVariables &precReachable = prec->Reachable();
// Loop each variable*component
std::for_each(precReachable.begin(), precReachable.end(), [&](const std::pair<uint32_t, BasicBlock::ReachableDefinitionsPerVariable> &itr2)
{
uint32_t regIdx = itr2.first;
const BasicBlock::ReachableDefinitionsPerVariable &defs = itr2.second;
// Already killed in this block?
if (VarKill().find(regIdx) != VarKill().end())
return;
// Only do comparisons against current definitions if we've yet to find any changes
BasicBlock::ReachableDefinitionsPerVariable *currReachablePerVar = 0;
if (!hasChanges)
currReachablePerVar = &m_Reachable[regIdx];
BasicBlock::ReachableDefinitionsPerVariable &newReachablePerVar = newReachable[regIdx];
// Loop each definition
std::for_each(defs.begin(), defs.end(), [&](const BasicBlock::Definition &d)
{
if (!hasChanges)
{
// Check if already there
if (currReachablePerVar->find(d) == currReachablePerVar->end())
hasChanges = true;
}
newReachablePerVar.insert(d);
}); // definition
}); // variable*component
}); // predecessor
if (hasChanges)
{
std::swap(m_Reachable, newReachable);
}
return hasChanges;
}
void BasicBlock::RVarUnion(ReachableVariables &a, const ReachableVariables &b)
{
std::for_each(b.begin(), b.end(), [&a](const std::pair<uint32_t, ReachableDefinitionsPerVariable> &rpvPair)
{
uint32_t regIdx = rpvPair.first;
const ReachableDefinitionsPerVariable &rpv = rpvPair.second;
// No previous definitions for this variable?
auto aRPVItr = a.find(regIdx);
if (aRPVItr == a.end())
{
// Just set the definitions and continue
a[regIdx] = rpv;
return;
}
ReachableDefinitionsPerVariable &aRPV = aRPVItr->second;
aRPV.insert(rpv.begin(), rpv.end());
});
}
#if ENABLE_UNIT_TESTS
#define UNITY_EXTERNAL_TOOL 1
#include "Projects/PrecompiledHeaders/UnityPrefix.h" // Needed for defines such as ENABLE_CPP_EXCEPTIONS
#include "Testing.h" // From Runtime/Testing
UNIT_TEST_SUITE(HLSLcc)
{
TEST(ControlFlowGraph_Build_Simple_Works)
{
Instruction inst[] =
{
// MOV t0.xyzw, I0.xyzw
Instruction(0, OPCODE_MOV, 0, 0xf, 0xffffffff, 0xf),
Instruction(1, OPCODE_RET)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[1], root.Last());
CHECK(root.Preceding().empty());
CHECK(root.Succeeding().empty());
CHECK_EQUAL(4, root.VarKill().size());
// Check that all components from t0 are killed
CHECK_EQUAL(1, root.VarKill().count(0));
CHECK_EQUAL(1, root.VarKill().count(1));
CHECK_EQUAL(1, root.VarKill().count(2));
CHECK_EQUAL(1, root.VarKill().count(3));
CHECK_EQUAL(&inst[0], root.DEDef().find(0)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(0)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(1)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(1)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(2)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(2)->second.begin()->m_Operand);
CHECK_EQUAL(&inst[0], root.DEDef().find(3)->second.begin()->m_Instruction);
CHECK_EQUAL(&inst[0].asOperands[0], root.DEDef().find(3)->second.begin()->m_Operand);
}
TEST(ControlFlowGraph_Build_If_Works)
{
Instruction inst[] =
{
// B0
// 0: MOV t1.xyzw, i0.xyzw
Instruction(0, OPCODE_MOV, 1, 0xf, 0xffffffff, 0xf),
// 1: MUL t0, t1, t1
Instruction(1, OPCODE_MUL, 0, 0xf, 1, 0xf, 1, 0xf),
// 2: IF t1.y
Instruction(2, OPCODE_IF, 1, 2),
// B1
// 3: MOV o0, t0
Instruction(3, OPCODE_MOV, 0xffffffff, 0xf, 0, 0xf),
// 4:
Instruction(4, OPCODE_ELSE),
// B2
// 5: MOV o0, t1
Instruction(5, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
// 6:
Instruction(6, OPCODE_ENDIF),
// B3
// 7:
Instruction(7, OPCODE_NOP),
// 8:
Instruction(8, OPCODE_RET)
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(root.First(), &inst[0]);
CHECK_EQUAL(root.Last(), &inst[2]);
CHECK(root.Preceding().empty());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[7]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
CHECK_EQUAL(&inst[3], b1->First());
CHECK_EQUAL(&inst[5], b2->First());
CHECK_EQUAL(&inst[7], b3->First());
CHECK_EQUAL(&inst[4], b1->Last());
CHECK_EQUAL(&inst[6], b2->Last());
CHECK_EQUAL(&inst[8], b3->Last());
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[5]));
CHECK_EQUAL(2, root.Succeeding().size());
CHECK_EQUAL(1, b1->Preceding().size());
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b2->Preceding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b3->Preceding().size());
CHECK_EQUAL(0, b3->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b3->Preceding().count(&inst[5]));
// The if block must have upwards-exposed t0
CHECK_EQUAL(1, b1->UEVar().count(0));
CHECK_EQUAL(1, b1->UEVar().count(1));
CHECK_EQUAL(1, b1->UEVar().count(2));
CHECK_EQUAL(1, b1->UEVar().count(3));
// The else block must have upwards-exposed t1
CHECK_EQUAL(1, b2->UEVar().count(4));
CHECK_EQUAL(1, b2->UEVar().count(5));
CHECK_EQUAL(1, b2->UEVar().count(6));
CHECK_EQUAL(1, b2->UEVar().count(7));
CHECK_EQUAL(8, root.VarKill().size());
// Check that all components from t0 and t1 are killed
CHECK_EQUAL(1, root.VarKill().count(0));
CHECK_EQUAL(1, root.VarKill().count(1));
CHECK_EQUAL(1, root.VarKill().count(2));
CHECK_EQUAL(1, root.VarKill().count(3));
CHECK_EQUAL(1, root.VarKill().count(4));
CHECK_EQUAL(1, root.VarKill().count(5));
CHECK_EQUAL(1, root.VarKill().count(6));
CHECK_EQUAL(1, root.VarKill().count(7));
// The expected downwards-exposed definitions:
// B0: t0, t1
// B1-B3: none
CHECK_EQUAL(8, root.DEDef().size());
CHECK_EQUAL(0, b1->DEDef().size());
CHECK_EQUAL(0, b2->DEDef().size());
CHECK_EQUAL(0, b3->DEDef().size());
CHECK(root.DEDef() == root.Reachable());
CHECK(root.Reachable() == b1->Reachable());
CHECK(root.Reachable() == b2->Reachable());
CHECK(root.Reachable() == b3->Reachable());
}
TEST(ControlFlowGraph_Build_SwitchCase_Works)
{
Instruction inst[] =
{
// Start B0
// i0: MOV t0.x, I0.x
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
// i1: MOVE t1.xyz, I0.yzw
Instruction(1, OPCODE_MOV, 1, 7, 0xffffffff, 0xe),
// i2: MOVE t1.w, t0.x
Instruction(2, OPCODE_MOV, 1, 8, 0xffffffff, 0x1),
// i3: MOVE t2, I0
Instruction(3, OPCODE_MOV, 2, 0xf, 0xffffffff, 0xf),
// i4: SWITCH t0.y
Instruction(4, OPCODE_SWITCH, 1, 2),
// End B0
// i5: CASE
Instruction(5, OPCODE_CASE),
// i6: DEFAULT
Instruction(6, OPCODE_DEFAULT),
// Start B1
// i7: MOC t1.z, t0.x
Instruction(7, OPCODE_MOV, 1, 4, 0, 1),
// i8: CASE
Instruction(8, OPCODE_CASE),
// End B1
// Start B2
// i9: MOV t1.z, t2.x
Instruction(9, OPCODE_MOV, 1, 4, 2, 1),
// i10: BREAK
Instruction(10, OPCODE_BREAK),
// End B2
// i11: CASE
Instruction(11, OPCODE_CASE),
// Start B3
// i12: MOV t1.z, t2.y
Instruction(12, OPCODE_MOV, 1, 4, 2, 2),
// i13: BREAKC t0.x
Instruction(13, OPCODE_BREAKC, 0, 1),
// End B3
// i14: CASE
Instruction(14, OPCODE_CASE),
// Start B4
// i15: MOV t1.z, t2.z
Instruction(15, OPCODE_MOV, 1, 4, 2, 4),
// i16: ENDSWITCH
Instruction(16, OPCODE_ENDSWITCH),
// End B4
// Start B5
// i17: MOV o0, t1
Instruction(17, OPCODE_MOV, 0xffffffff, 0xf, 1, 0xf),
// i18: RET
Instruction(18, OPCODE_RET)
// End B5
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[4], root.Last());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[7]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[9]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[12]);
const BasicBlock *b4 = cfg.GetBasicBlockForInstruction(&inst[15]);
const BasicBlock *b5 = cfg.GetBasicBlockForInstruction(&inst[17]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
CHECK(b4 != NULL);
CHECK(b5 != NULL);
// Check instruction ranges
CHECK_EQUAL(&inst[8], b1->Last());
CHECK_EQUAL(&inst[10], b2->Last());
CHECK_EQUAL(&inst[13], b3->Last());
CHECK_EQUAL(&inst[16], b4->Last());
CHECK_EQUAL(&inst[18], b5->Last());
// Nothing before the root, nothing after b5
CHECK(root.Preceding().empty());
CHECK(b5->Succeeding().empty());
// Check that all connections are there and no others.
// B0->B1
// B0->B2
// B0->B3
// B0->B4
CHECK_EQUAL(1, root.Succeeding().count(&inst[7]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[9]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[12]));
CHECK_EQUAL(1, root.Succeeding().count(&inst[15]));
CHECK_EQUAL(4, root.Succeeding().size());
// B1
// B1->B2
CHECK_EQUAL(1, b1->Succeeding().count(&inst[9]));
CHECK_EQUAL(1, b1->Succeeding().size());
// B0->B1, reverse
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b1->Preceding().size());
// B2
// B2->B5
CHECK_EQUAL(1, b2->Succeeding().count(&inst[17]));
CHECK_EQUAL(1, b2->Succeeding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[7]));
CHECK_EQUAL(1, b2->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b2->Preceding().size());
// B3
// B3->B4
// B3->B5
CHECK_EQUAL(1, b3->Succeeding().count(&inst[15]));
CHECK_EQUAL(1, b3->Succeeding().count(&inst[17]));
CHECK_EQUAL(2, b3->Succeeding().size());
CHECK_EQUAL(1, b3->Preceding().count(&inst[0]));
CHECK_EQUAL(1, b3->Preceding().size());
// B4
CHECK_EQUAL(1, b4->Succeeding().count(&inst[17]));
CHECK_EQUAL(1, b4->Succeeding().size());
CHECK_EQUAL(1, b4->Preceding().count(&inst[0]));
CHECK_EQUAL(2, b4->Preceding().size());
// B5
CHECK_EQUAL(0, b5->Succeeding().size());
CHECK_EQUAL(3, b5->Preceding().size()); //b2, b3, b4
CHECK_EQUAL(1, b5->Preceding().count(&inst[9]));
CHECK_EQUAL(1, b5->Preceding().count(&inst[12]));
CHECK_EQUAL(1, b5->Preceding().count(&inst[15]));
// Verify reachable sets
CHECK(root.Reachable() == root.DEDef());
CHECK_EQUAL(9, root.Reachable().size());
// B5 should have these reachables:
// t0.x only from b0
// t1.xy from b0, i1
// t1.z from b2,i9 + b3,i12 + b4,i15 (the defs from b0 and b1 are killed by b2)
// t1.w from b0, i2
// t2.xyzw from b0, i3
// Cast away const so [] works.
BasicBlock::ReachableVariables &r = (BasicBlock::ReachableVariables &)b5->Reachable();
CHECK_EQUAL(9, r.size());
CHECK_EQUAL(1, r[0].size());
CHECK_EQUAL(0, r[1].size());
CHECK_EQUAL(0, r[2].size());
CHECK_EQUAL(0, r[3].size());
CHECK_EQUAL(&inst[0], r[0].begin()->m_Instruction);
CHECK_EQUAL(1, r[4].size());
CHECK_EQUAL(1, r[5].size());
CHECK_EQUAL(3, r[6].size());
CHECK_EQUAL(1, r[7].size());
const BasicBlock::ReachableDefinitionsPerVariable &d = r[6];
BasicBlock::ReachableDefinitionsPerVariable t;
t.insert(BasicBlock::Definition(&inst[9], &inst[9].asOperands[0]));
t.insert(BasicBlock::Definition(&inst[12], &inst[12].asOperands[0]));
t.insert(BasicBlock::Definition(&inst[15], &inst[15].asOperands[0]));
CHECK(t == d);
CHECK_EQUAL(1, r[8].size());
CHECK_EQUAL(1, r[9].size());
CHECK_EQUAL(1, r[10].size());
CHECK_EQUAL(1, r[11].size());
}
TEST(ControlFlowGraph_Build_Loop_Works)
{
Instruction inst[] =
{
// Start B0
// i0: MOV t0.x, I0.x
Instruction(0, OPCODE_MOV, 0, 1, 0xffffffff, 1),
// i1: MOVE t1.xy, I0.zw // The .x definition should not make it past the loop, .y should.
Instruction(1, OPCODE_MOV, 1, 3, 0xffffffff, 0xc),
// i2: LOOP
Instruction(2, OPCODE_LOOP, 1, 2),
// End B0 -> B1
// Begin B1
// i3: MOV t1.x, t0.x
Instruction(3, OPCODE_MOV, 1, 1, 0, 1),
// i4: BREAKC t0.x
Instruction(4, OPCODE_BREAKC, 0, 1),
// End B1 -> B2, B3
// Begin B2
// i5: ADD t0.x, t0.y
Instruction(5, OPCODE_ADD, 0, 1, 0, 2),
// i6: MOV t1.x, t0.x // This should never show up as definition
Instruction(6, OPCODE_MOV, 1, 1, 0, 1),
// i7: ENDLOOP
Instruction(7, OPCODE_ENDLOOP),
// End B2 -> B1
// Start B3
// i8: MOV O0.x, t1.x
Instruction(8, OPCODE_MOV, 0xffffffff, 1, 1, 1),
// i9: RET
Instruction(9, OPCODE_RET),
// End B3
};
ControlFlowGraph cfg;
const BasicBlock &root = cfg.Build(inst, inst + ARRAY_SIZE(inst));
CHECK_EQUAL(&inst[0], root.First());
CHECK_EQUAL(&inst[2], root.Last());
const BasicBlock *b1 = cfg.GetBasicBlockForInstruction(&inst[3]);
const BasicBlock *b2 = cfg.GetBasicBlockForInstruction(&inst[5]);
const BasicBlock *b3 = cfg.GetBasicBlockForInstruction(&inst[8]);
CHECK(b1 != NULL);
CHECK(b2 != NULL);
CHECK(b3 != NULL);
// Check instruction ranges
CHECK_EQUAL(&inst[4], b1->Last());
CHECK_EQUAL(&inst[7], b2->Last());
CHECK_EQUAL(&inst[9], b3->Last());
// Nothing before the root, nothing after b3
CHECK(root.Preceding().empty());
CHECK(b3->Succeeding().empty());
// Check that all connections are there and no others.
// B0->B1
CHECK_EQUAL(1, root.Succeeding().count(&inst[3]));
CHECK_EQUAL(1, root.Succeeding().size());
// B1
// B1->B2
// B1->B3
CHECK_EQUAL(1, b1->Succeeding().count(&inst[5]));
CHECK_EQUAL(1, b1->Succeeding().count(&inst[8]));
CHECK_EQUAL(2, b1->Succeeding().size());
// B0->B1, reverse
CHECK_EQUAL(1, b1->Preceding().count(&inst[0]));
// We may also come from B2
CHECK_EQUAL(1, b1->Preceding().count(&inst[5]));
CHECK_EQUAL(2, b1->Preceding().size());
// B2
// B2->B1
CHECK_EQUAL(1, b2->Succeeding().count(&inst[3]));
CHECK_EQUAL(1, b2->Succeeding().size());
CHECK_EQUAL(1, b2->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b2->Preceding().size());
// B3
CHECK_EQUAL(1, b3->Preceding().count(&inst[3]));
CHECK_EQUAL(1, b3->Preceding().size());
// Verify reachable sets
BasicBlock::ReachableVariables t;
// B0 DEDef and Reachable
t.clear();
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[4].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
CHECK(root.DEDef() == t);
CHECK(root.Reachable() == root.DEDef());
// B1 DEDef and Reachable
t.clear();
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
CHECK(b1->DEDef() == t);
t = b1->DEDef();
// t0.x from i0, t1.y (but not .x) from i1
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
// t0.x from i5, but nothing from i6
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
CHECK(b1->Reachable() == t);
// B2
t.clear();
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
t[4].insert(BasicBlock::Definition(&inst[6], &inst[6].asOperands[0]));
CHECK(b2->DEDef() == t);
t = b2->DEDef();
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
CHECK(b2->Reachable() == t);
// B3
t.clear();
CHECK(b3->DEDef() == t);
// t0.x from i0, t1.y from i1
t[0].insert(BasicBlock::Definition(&inst[0], &inst[0].asOperands[0]));
t[5].insert(BasicBlock::Definition(&inst[1], &inst[1].asOperands[0]));
// t1.x from i3
t[4].insert(BasicBlock::Definition(&inst[3], &inst[3].asOperands[0]));
// t0.x from i5
t[0].insert(BasicBlock::Definition(&inst[5], &inst[5].asOperands[0]));
CHECK(b3->Reachable() == t);
}
}
#endif

View File

@ -1,116 +0,0 @@
#include "ControlFlowGraphUtils.h"
#include "internal_includes/debug.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
// Get the next instruction that's not one of CASE, DEFAULT, LOOP, ENDSWITCH
const Instruction *HLSLcc::ControlFlow::Utils::GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch /*= 0*/)
{
const Instruction *inst = psStart;
// Skip CASE/DEFAULT/ENDSWITCH/LOOP labels
while (inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_LOOP)
{
// We really shouldn't be seeing ENDSWITCH without sawEndSwitch being set (as in, we're expecting it)
ASSERT(inst->eOpcode != OPCODE_ENDSWITCH || sawEndSwitch != NULL);
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != NULL)
*sawEndSwitch = true;
inst++;
}
return inst;
}
// For a given flow-control instruction, find the corresponding jump location:
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
// For ELSE, find same level ENDIF + 1
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
// For ENDLOOP, find previous same-level LOOP + 1
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
// For CONTINUE/C the previous LOOP + 1
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
// Note that CASE labels fall through.
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
const Instruction * HLSLcc::ControlFlow::Utils::GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch /*= 0*/, bool *needConnectToParent /* = 0*/)
{
const Instruction *inst = psStart;
int depth = 0;
OPCODE_TYPE op = psStart->eOpcode;
ASSERT(op == OPCODE_IF || op == OPCODE_ELSE || op == OPCODE_BREAK || op == OPCODE_BREAKC
|| op == OPCODE_SWITCH || op == OPCODE_CASE || op == OPCODE_DEFAULT
|| op == OPCODE_ENDLOOP || op == OPCODE_CONTINUE || op == OPCODE_CONTINUEC);
switch (op)
{
default:
ASSERT(0);
break;
case OPCODE_IF:
case OPCODE_ELSE:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_ELSE || inst->eOpcode == OPCODE_ENDIF) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_IF)
depth++;
if (inst->eOpcode == OPCODE_ENDIF)
depth--;
}
case OPCODE_BREAK:
case OPCODE_BREAKC:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_ENDLOOP || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_SWITCH || inst->eOpcode == OPCODE_LOOP)
depth++;
if (inst->eOpcode == OPCODE_ENDSWITCH || inst->eOpcode == OPCODE_ENDLOOP)
depth--;
}
case OPCODE_CONTINUE:
case OPCODE_CONTINUEC:
case OPCODE_ENDLOOP:
while (1)
{
inst--;
if ((inst->eOpcode == OPCODE_LOOP) && (depth == 0))
{
return GetNextNonLabelInstruction(inst + 1, sawEndSwitch);
}
if (inst->eOpcode == OPCODE_LOOP)
depth--;
if (inst->eOpcode == OPCODE_ENDLOOP)
depth++;
}
case OPCODE_SWITCH:
case OPCODE_CASE:
case OPCODE_DEFAULT:
while (1)
{
inst++;
if ((inst->eOpcode == OPCODE_CASE || inst->eOpcode == OPCODE_DEFAULT || inst->eOpcode == OPCODE_ENDSWITCH) && (depth == 0))
{
// Note that we'll skip setting sawEndSwitch if inst->eOpcode = OPCODE_ENDSWITCH
// so that BasicBlock::Build can distinguish between there being a direct route
// from SWITCH->ENDSWITCH (CASE followed directly by ENDSWITCH) and not.
if (inst->eOpcode == OPCODE_ENDSWITCH && sawEndSwitch != 0)
*sawEndSwitch = true;
return GetNextNonLabelInstruction(inst + 1, needConnectToParent);
}
if (inst->eOpcode == OPCODE_SWITCH)
depth++;
if (inst->eOpcode == OPCODE_ENDSWITCH)
depth--;
}
}
return 0;
}

View File

@ -1,777 +0,0 @@
#include "internal_includes/debug.h"
#include "internal_includes/tokens.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/DataTypeAnalysis.h"
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
#include <algorithm>
// Helper function to set the vector type of 1 or more components in a vector
// If the existing values (in vector we're writing to) are all SVT_VOID, just upgrade the value and we're done
// Otherwise, set all the components in the vector that currently are set to that same value OR are now being written to
// to the "highest" type value (ordering int->uint->float)
static void SetVectorType(std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType, uint32_t regBaseIndex, uint32_t componentMask, SHADER_VARIABLE_TYPE eType, int *psMadeProgress)
{
int i = 0;
// Expand the mask to include all components that are used, also upgrade type
for (i = 0; i < 4; i++)
{
if (aeTempVecType[regBaseIndex + i] != SVT_VOID)
{
componentMask |= (1 << i);
eType = HLSLcc::SelectHigherType(eType, aeTempVecType[regBaseIndex + i]);
}
}
// Now componentMask contains the components we actually need to update and eType may have been changed to something else.
// Write the results
for (i = 0; i < 4; i++)
{
if (componentMask & (1 << i))
{
if (aeTempVecType[regBaseIndex + i] != eType)
{
aeTempVecType[regBaseIndex + i] = eType;
if (psMadeProgress)
*psMadeProgress = 1;
}
}
}
}
static SHADER_VARIABLE_TYPE OperandPrecisionToShaderVariableType(OPERAND_MIN_PRECISION prec, SHADER_VARIABLE_TYPE eDefault)
{
SHADER_VARIABLE_TYPE eType = eDefault;
switch (prec)
{
case OPERAND_MIN_PRECISION_DEFAULT:
break;
case OPERAND_MIN_PRECISION_SINT_16:
eType = SVT_INT16;
break;
case OPERAND_MIN_PRECISION_UINT_16:
eType = SVT_UINT16;
break;
case OPERAND_MIN_PRECISION_FLOAT_2_8:
eType = SVT_FLOAT10;
break;
case OPERAND_MIN_PRECISION_FLOAT_16:
eType = SVT_FLOAT16;
break;
default:
ASSERT(0); // Catch this to see what's going on.
break;
}
return eType;
}
static void MarkOperandAs(Operand *psOperand, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
{
if (psOperand->eType == OPERAND_TYPE_TEMP)
{
const uint32_t ui32RegIndex = psOperand->ui32RegisterNumber * 4;
uint32_t mask = psOperand->GetAccessMask();
// Adjust type based on operand precision
eType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, eType);
SetVectorType(aeTempVecType, ui32RegIndex, mask, eType, NULL);
}
}
static void MarkAllOperandsAs(Instruction* psInst, SHADER_VARIABLE_TYPE eType, std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType)
{
uint32_t i = 0;
for (i = 0; i < psInst->ui32NumOperands; i++)
{
MarkOperandAs(&psInst->asOperands[i], eType, aeTempVecType);
}
}
// Mark scalars from CBs. TODO: Do we need to do the same for vec2/3's as well? There may be swizzles involved which make it vec4 or something else again.
static void SetCBOperandComponents(HLSLCrossCompilerContext *psContext, Operand *psOperand)
{
const ConstantBuffer* psCBuf = NULL;
const ShaderVarType* psVarType = NULL;
int32_t rebase = 0;
bool isArray;
if (psOperand->eType != OPERAND_TYPE_CONSTANT_BUFFER)
return;
// Ignore selection modes that access more than one component
switch (psOperand->eSelMode)
{
case OPERAND_4_COMPONENT_SELECT_1_MODE:
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
if (!psOperand->IsSwizzleReplicated())
return;
break;
case OPERAND_4_COMPONENT_MASK_MODE:
return;
}
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, psOperand->aui32ArraySizes[0], &psCBuf);
ShaderInfo::GetShaderVarFromOffset(psOperand->aui32ArraySizes[1], psOperand->aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (psVarType->Class == SVC_SCALAR)
psOperand->iNumComponents = 1;
}
struct SetPartialDataTypes
{
SetPartialDataTypes(SHADER_VARIABLE_TYPE *_aeTempVec)
: m_TempVec(_aeTempVec)
{}
SHADER_VARIABLE_TYPE *m_TempVec;
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
{
uint32_t mask = 0;
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
SHADER_VARIABLE_TYPE newType;
uint32_t i, reg;
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
if (ui32OperandType == FEO_FLAG_SUBOPERAND)
{
// We really shouldn't ever be getting minprecision float indices here
ASSERT(psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_16 && psOperand->eMinPrecision != OPERAND_MIN_PRECISION_FLOAT_2_8);
mask = psOperand->GetAccessMask();
reg = psOperand->ui32RegisterNumber;
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_INT_AMBIGUOUS);
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
if (aeTempVecType[reg * 4 + i] == SVT_VOID)
aeTempVecType[reg * 4 + i] = newType;
}
return;
}
if (psOperand->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT)
return;
mask = psOperand->GetAccessMask();
reg = psOperand->ui32RegisterNumber;
newType = OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, SVT_VOID);
ASSERT(newType != SVT_VOID);
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
aeTempVecType[reg * 4 + i] = newType;
}
}
};
// Write back the temp datatypes into operands. Also mark scalars in constant buffers
struct WritebackDataTypes
{
WritebackDataTypes(HLSLCrossCompilerContext *_ctx, SHADER_VARIABLE_TYPE *_aeTempVec)
: m_Context(_ctx)
, m_TempVec(_aeTempVec)
{}
HLSLCrossCompilerContext *m_Context;
SHADER_VARIABLE_TYPE *m_TempVec;
template<typename ItrType> void operator()(ItrType inst, Operand *psOperand, uint32_t ui32OperandType) const
{
SHADER_VARIABLE_TYPE *aeTempVecType = m_TempVec;
uint32_t reg, mask, i;
SHADER_VARIABLE_TYPE dtype;
if (psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
SetCBOperandComponents(m_Context, psOperand);
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
reg = psOperand->ui32RegisterNumber;
mask = psOperand->GetAccessMask();
dtype = SVT_VOID;
for (i = 0; i < 4; i++)
{
if (!(mask & (1 << i)))
continue;
// Check that all components have the same type
ASSERT(dtype == SVT_VOID || dtype == aeTempVecType[reg * 4 + i]);
dtype = aeTempVecType[reg * 4 + i];
ASSERT(dtype != SVT_VOID);
ASSERT(dtype == OperandPrecisionToShaderVariableType(psOperand->eMinPrecision, dtype));
psOperand->aeDataType[i] = dtype;
}
}
};
void HLSLcc::DataTypeAnalysis::SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> & instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results)
{
uint32_t i;
Instruction *psFirstInst = &instructions[0];
Instruction *psInst = psFirstInst;
// Start with void, then move up the chain void->ambiguous int->minprec int/uint->int/uint->minprec float->float
std::vector<SHADER_VARIABLE_TYPE> &aeTempVecType = results;
aeTempVecType.clear();
aeTempVecType.resize(ui32TempCount * 4, SVT_VOID);
if (ui32TempCount == 0)
return;
// Go through the instructions, pick up partial datatypes, because we at least know those for a fact.
// Also set all suboperands to be integers (they're always used as indices)
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, SetPartialDataTypes(&aeTempVecType[0]));
// if (psContext->psShader->ui32MajorVersion <= 3)
{
// First pass, do analysis: deduce the data type based on opcodes, fill out aeTempVecType table
// Only ever to int->float promotion (or int->uint), never the other way around
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
{
if (psInst->ui32NumOperands == 0)
continue;
#ifdef _DEBUG
for (int k = 0; k < (int)psInst->ui32NumOperands; k++)
{
if (psInst->asOperands[k].eType == OPERAND_TYPE_TEMP)
{
ASSERT(psInst->asOperands[k].ui32RegisterNumber < ui32TempCount);
}
}
#endif
switch (psInst->eOpcode)
{
// All float-only ops
case OPCODE_ADD:
case OPCODE_DERIV_RTX:
case OPCODE_DERIV_RTY:
case OPCODE_DIV:
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_EXP:
case OPCODE_FRC:
case OPCODE_LOG:
case OPCODE_MAD:
case OPCODE_MIN:
case OPCODE_MAX:
case OPCODE_MUL:
case OPCODE_ROUND_NE:
case OPCODE_ROUND_NI:
case OPCODE_ROUND_PI:
case OPCODE_ROUND_Z:
case OPCODE_RSQ:
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_SAMPLE_B:
case OPCODE_SQRT:
case OPCODE_SINCOS:
case OPCODE_LOD:
case OPCODE_GATHER4:
case OPCODE_DERIV_RTX_COARSE:
case OPCODE_DERIV_RTX_FINE:
case OPCODE_DERIV_RTY_COARSE:
case OPCODE_DERIV_RTY_FINE:
case OPCODE_GATHER4_C:
case OPCODE_GATHER4_PO:
case OPCODE_GATHER4_PO_C:
case OPCODE_RCP:
MarkAllOperandsAs(psInst, SVT_FLOAT, aeTempVecType);
break;
// Comparison ops, need to enable possibility for going boolean
case OPCODE_IEQ:
case OPCODE_INE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT_AMBIGUOUS, aeTempVecType);
break;
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CALLC:
case OPCODE_CONTINUEC:
case OPCODE_RETC:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
break;
case OPCODE_ILT:
case OPCODE_IGE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
break;
case OPCODE_ULT:
case OPCODE_UGE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_UINT, aeTempVecType);
break;
case OPCODE_AND:
case OPCODE_OR:
MarkOperandAs(&psInst->asOperands[0], SVT_INT_AMBIGUOUS, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
// Integer ops that don't care of signedness
case OPCODE_IADD:
case OPCODE_INEG:
case OPCODE_ISHL:
case OPCODE_NOT:
case OPCODE_XOR:
case OPCODE_BUFINFO:
case OPCODE_COUNTBITS:
case OPCODE_FIRSTBIT_HI:
case OPCODE_FIRSTBIT_LO:
case OPCODE_FIRSTBIT_SHI:
case OPCODE_BFI:
case OPCODE_BFREV:
case OPCODE_ATOMIC_AND:
case OPCODE_ATOMIC_OR:
case OPCODE_ATOMIC_XOR:
case OPCODE_ATOMIC_CMP_STORE:
case OPCODE_ATOMIC_IADD:
case OPCODE_IMM_ATOMIC_IADD:
case OPCODE_IMM_ATOMIC_AND:
case OPCODE_IMM_ATOMIC_OR:
case OPCODE_IMM_ATOMIC_XOR:
case OPCODE_IMM_ATOMIC_EXCH:
case OPCODE_IMM_ATOMIC_CMP_EXCH:
MarkAllOperandsAs(psInst, SVT_INT_AMBIGUOUS, aeTempVecType);
break;
// Integer ops
case OPCODE_IMAD:
case OPCODE_IMAX:
case OPCODE_IMIN:
case OPCODE_IMUL:
case OPCODE_ISHR:
case OPCODE_IBFE:
case OPCODE_ATOMIC_IMAX:
case OPCODE_ATOMIC_IMIN:
case OPCODE_IMM_ATOMIC_IMAX:
case OPCODE_IMM_ATOMIC_IMIN:
MarkAllOperandsAs(psInst, SVT_INT, aeTempVecType);
break;
// uint ops
case OPCODE_UDIV:
case OPCODE_UMUL:
case OPCODE_UMAD:
case OPCODE_UMAX:
case OPCODE_UMIN:
case OPCODE_USHR:
case OPCODE_UADDC:
case OPCODE_USUBB:
case OPCODE_ATOMIC_UMAX:
case OPCODE_ATOMIC_UMIN:
case OPCODE_IMM_ATOMIC_UMAX:
case OPCODE_IMM_ATOMIC_UMIN:
case OPCODE_IMM_ATOMIC_ALLOC:
case OPCODE_IMM_ATOMIC_CONSUME:
MarkAllOperandsAs(psInst, SVT_UINT, aeTempVecType);
break;
case OPCODE_UBFE:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[3], SVT_UINT, aeTempVecType);
break;
// Need special handling
case OPCODE_FTOI:
case OPCODE_FTOU:
MarkOperandAs(&psInst->asOperands[0], psInst->eOpcode == OPCODE_FTOI ? SVT_INT : SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_GE:
case OPCODE_LT:
case OPCODE_EQ:
case OPCODE_NE:
MarkOperandAs(&psInst->asOperands[0], SVT_BOOL, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_ITOF:
case OPCODE_UTOF:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], psInst->eOpcode == OPCODE_ITOF ? SVT_INT : SVT_UINT, aeTempVecType);
break;
case OPCODE_LD:
case OPCODE_LD_MS:
{
SHADER_VARIABLE_TYPE samplerReturnType = psInst->asOperands[2].aeDataType[0];
MarkOperandAs(&psInst->asOperands[0], samplerReturnType, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
break;
}
case OPCODE_MOVC:
MarkOperandAs(&psInst->asOperands[1], SVT_BOOL, aeTempVecType);
break;
case OPCODE_SWAPC:
MarkOperandAs(&psInst->asOperands[2], SVT_BOOL, aeTempVecType);
break;
case OPCODE_RESINFO:
// Operand 0 depends on the return type declaration, op 1 is always uint
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
switch (psInst->eResInfoReturnType)
{
default:
case RESINFO_INSTRUCTION_RETURN_FLOAT:
case RESINFO_INSTRUCTION_RETURN_RCPFLOAT:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
break;
case RESINFO_INSTRUCTION_RETURN_UINT:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
break;
}
break;
case OPCODE_SAMPLE_INFO:
// Sample_info uses the same RESINFO_RETURN_TYPE for storage. 0 = float, 1 = uint.
MarkOperandAs(&psInst->asOperands[0], psInst->eResInfoReturnType == RESINFO_INSTRUCTION_RETURN_FLOAT ? SVT_FLOAT : SVT_UINT, aeTempVecType);
break;
case OPCODE_SAMPLE_POS:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_LD_UAV_TYPED:
// translates to gvec4 loadImage(gimage i, ivec p).
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
break;
case OPCODE_STORE_UAV_TYPED:
// translates to storeImage(gimage i, ivec p, gvec4 data)
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType); // ivec p
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType); // gvec4 data
break;
case OPCODE_LD_RAW:
if (psInst->asOperands[2].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
else
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
break;
case OPCODE_STORE_RAW:
if (psInst->asOperands[0].eType == OPERAND_TYPE_THREAD_GROUP_SHARED_MEMORY)
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
else
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
break;
case OPCODE_LD_STRUCTURED:
MarkOperandAs(&psInst->asOperands[0], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
break;
case OPCODE_STORE_STRUCTURED:
MarkOperandAs(&psInst->asOperands[1], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[2], SVT_INT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[3], SVT_INT, aeTempVecType);
break;
case OPCODE_F32TOF16:
MarkOperandAs(&psInst->asOperands[0], SVT_UINT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_FLOAT, aeTempVecType);
break;
case OPCODE_F16TOF32:
MarkOperandAs(&psInst->asOperands[0], SVT_FLOAT, aeTempVecType);
MarkOperandAs(&psInst->asOperands[1], SVT_UINT, aeTempVecType);
break;
// No-operands, should never get here anyway
/* case OPCODE_BREAK:
case OPCODE_CALL:
case OPCODE_CASE:
case OPCODE_CONTINUE:
case OPCODE_CUT:
case OPCODE_DEFAULT:
case OPCODE_DISCARD:
case OPCODE_ELSE:
case OPCODE_EMIT:
case OPCODE_EMITTHENCUT:
case OPCODE_ENDIF:
case OPCODE_ENDLOOP:
case OPCODE_ENDSWITCH:
case OPCODE_LABEL:
case OPCODE_LOOP:
case OPCODE_CUSTOMDATA:
case OPCODE_NOP:
case OPCODE_RET:
case OPCODE_SWITCH:
case OPCODE_DCL_RESOURCE: // DCL* opcodes have
case OPCODE_DCL_CONSTANT_BUFFER: // custom operand formats.
case OPCODE_DCL_SAMPLER:
case OPCODE_DCL_INDEX_RANGE:
case OPCODE_DCL_GS_OUTPUT_PRIMITIVE_TOPOLOGY:
case OPCODE_DCL_GS_INPUT_PRIMITIVE:
case OPCODE_DCL_MAX_OUTPUT_VERTEX_COUNT:
case OPCODE_DCL_INPUT:
case OPCODE_DCL_INPUT_SGV:
case OPCODE_DCL_INPUT_SIV:
case OPCODE_DCL_INPUT_PS:
case OPCODE_DCL_INPUT_PS_SGV:
case OPCODE_DCL_INPUT_PS_SIV:
case OPCODE_DCL_OUTPUT:
case OPCODE_DCL_OUTPUT_SGV:
case OPCODE_DCL_OUTPUT_SIV:
case OPCODE_DCL_TEMPS:
case OPCODE_DCL_INDEXABLE_TEMP:
case OPCODE_DCL_GLOBAL_FLAGS:
case OPCODE_HS_DECLS: // token marks beginning of HS sub-shader
case OPCODE_HS_CONTROL_POINT_PHASE: // token marks beginning of HS sub-shader
case OPCODE_HS_FORK_PHASE: // token marks beginning of HS sub-shader
case OPCODE_HS_JOIN_PHASE: // token marks beginning of HS sub-shader
case OPCODE_EMIT_STREAM:
case OPCODE_CUT_STREAM:
case OPCODE_EMITTHENCUT_STREAM:
case OPCODE_INTERFACE_CALL:
case OPCODE_DCL_STREAM:
case OPCODE_DCL_FUNCTION_BODY:
case OPCODE_DCL_FUNCTION_TABLE:
case OPCODE_DCL_INTERFACE:
case OPCODE_DCL_INPUT_CONTROL_POINT_COUNT:
case OPCODE_DCL_OUTPUT_CONTROL_POINT_COUNT:
case OPCODE_DCL_TESS_DOMAIN:
case OPCODE_DCL_TESS_PARTITIONING:
case OPCODE_DCL_TESS_OUTPUT_PRIMITIVE:
case OPCODE_DCL_HS_MAX_TESSFACTOR:
case OPCODE_DCL_HS_FORK_PHASE_INSTANCE_COUNT:
case OPCODE_DCL_HS_JOIN_PHASE_INSTANCE_COUNT:
case OPCODE_DCL_THREAD_GROUP:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW:
case OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED:
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_RAW:
case OPCODE_DCL_THREAD_GROUP_SHARED_MEMORY_STRUCTURED:
case OPCODE_DCL_RESOURCE_RAW:
case OPCODE_DCL_RESOURCE_STRUCTURED:
case OPCODE_SYNC:
case OPCODE_EVAL_SNAPPED:
case OPCODE_EVAL_SAMPLE_INDEX:
case OPCODE_EVAL_CENTROID:
case OPCODE_DCL_GS_INSTANCE_COUNT:
case OPCODE_ABORT:
case OPCODE_DEBUG_BREAK:
// Double not supported
case OPCODE_DADD:
case OPCODE_DMAX:
case OPCODE_DMIN:
case OPCODE_DMUL:
case OPCODE_DEQ:
case OPCODE_DGE:
case OPCODE_DLT:
case OPCODE_DNE:
case OPCODE_DMOV:
case OPCODE_DMOVC:
case OPCODE_DTOF:
case OPCODE_FTOD:
*/
default:
break;
}
}
}
{
int madeProgress = 0;
// Next go through MOV and MOVC and propagate the data type of whichever parameter we happen to have
do
{
madeProgress = 0;
psInst = psFirstInst;
for (i = 0; i < (uint32_t)instructions.size(); ++i, psInst++)
{
if (psInst->eOpcode == OPCODE_MOV || psInst->eOpcode == OPCODE_MOVC)
{
// Figure out the data type
uint32_t k;
SHADER_VARIABLE_TYPE dataType = SVT_VOID;
int foundImmediate = 0;
for (k = 0; k < psInst->ui32NumOperands; k++)
{
uint32_t mask, j;
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
continue; // Ignore the condition operand, it's always int
if (psInst->asOperands[k].eType == OPERAND_TYPE_IMMEDIATE32)
{
foundImmediate = 1;
continue; // We don't know the data type of immediates yet, but if this is the only one found, mark as int, it'll get promoted later if needed
}
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
{
dataType = psInst->asOperands[k].GetDataType(psContext);
break;
}
if (psInst->asOperands[k].eModifier != OPERAND_MODIFIER_NONE)
{
// If any modifiers are used in MOV or MOVC, that automatically is treated as float.
dataType = SVT_FLOAT;
break;
}
mask = psInst->asOperands[k].GetAccessMask();
for (j = 0; j < 4; j++)
{
if (!(mask & (1 << j)))
continue;
if (aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j] != SVT_VOID)
{
dataType = HLSLcc::SelectHigherType(dataType, aeTempVecType[psInst->asOperands[k].ui32RegisterNumber * 4 + j]);
}
}
}
// Use at minimum int type when any operand is immediate.
// Allowing bool could lead into bugs like case 883080
if (foundImmediate && (dataType == SVT_VOID || dataType == SVT_BOOL))
dataType = SVT_INT;
if (dataType != SVT_VOID)
{
// Found data type, write to all operands
// First adjust it to not have precision qualifiers in it
switch (dataType)
{
case SVT_FLOAT10:
case SVT_FLOAT16:
dataType = SVT_FLOAT;
break;
case SVT_INT12:
case SVT_INT16:
dataType = SVT_INT;
break;
case SVT_UINT16:
case SVT_UINT8:
dataType = SVT_UINT;
break;
default:
break;
}
for (k = 0; k < psInst->ui32NumOperands; k++)
{
uint32_t mask;
if (psInst->eOpcode == OPCODE_MOVC && k == 1)
continue; // Ignore the condition operand, it's always int
if (psInst->asOperands[k].eType != OPERAND_TYPE_TEMP)
continue;
if (psInst->asOperands[k].eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
continue;
mask = psInst->asOperands[k].GetAccessMask();
SetVectorType(aeTempVecType, psInst->asOperands[k].ui32RegisterNumber * 4, mask, dataType, &madeProgress);
}
}
}
}
}
while (madeProgress != 0);
}
// translate forced_int and int_ambiguous back to int
for (i = 0; i < ui32TempCount * 4; i++)
{
if (aeTempVecType[i] == SVT_FORCED_INT || aeTempVecType[i] == SVT_INT_AMBIGUOUS)
aeTempVecType[i] = SVT_INT;
}
ForEachOperand(instructions.begin(), instructions.end(), FEO_FLAG_ALL, WritebackDataTypes(psContext, &aeTempVecType[0]));
// Propagate boolean data types over logical operators
bool didProgress = false;
do
{
didProgress = false;
std::for_each(instructions.begin(), instructions.end(), [&didProgress, &psContext, &aeTempVecType](Instruction &i)
{
if ((i.eOpcode == OPCODE_AND || i.eOpcode == OPCODE_OR)
&& (i.asOperands[1].GetDataType(psContext) == SVT_BOOL && i.asOperands[2].GetDataType(psContext) == SVT_BOOL)
&& (i.asOperands[0].eType == OPERAND_TYPE_TEMP && i.asOperands[0].GetDataType(psContext) != SVT_BOOL))
{
// Check if all uses see only this define
bool isStandalone = true;
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [&isStandalone](Instruction::Use &u)
{
if (u.m_Op->m_Defines.size() > 1)
isStandalone = false;
});
if (isStandalone)
{
didProgress = true;
// Change data type of this and all uses
i.asOperands[0].aeDataType[0] = i.asOperands[0].aeDataType[1] = i.asOperands[0].aeDataType[2] = i.asOperands[0].aeDataType[3] = SVT_BOOL;
uint32_t reg = i.asOperands[0].ui32RegisterNumber;
aeTempVecType[reg * 4 + 0] = aeTempVecType[reg * 4 + 1] = aeTempVecType[reg * 4 + 2] = aeTempVecType[reg * 4 + 3] = SVT_BOOL;
std::for_each(i.m_Uses.begin(), i.m_Uses.end(), [](Instruction::Use &u)
{
u.m_Op->aeDataType[0] = u.m_Op->aeDataType[1] = u.m_Op->aeDataType[2] = u.m_Op->aeDataType[3] = SVT_BOOL;
});
}
}
});
}
while (didProgress);
}

View File

@ -1 +0,0 @@
#include "internal_includes/Declaration.h"

View File

@ -1,350 +0,0 @@
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/Shader.h"
#include "internal_includes/DataTypeAnalysis.h"
#include "internal_includes/UseDefineChains.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/debug.h"
#include "internal_includes/Translator.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/languages.h"
#include "include/hlslcc.h"
#include <sstream>
void HLSLCrossCompilerContext::DoDataTypeAnalysis(ShaderPhase *psPhase)
{
size_t ui32DeclCount = psPhase->psDecl.size();
uint32_t i;
psPhase->psTempDeclaration = NULL;
psPhase->ui32OrigTemps = 0;
psPhase->ui32TotalTemps = 0;
// Retrieve the temp decl count
for (i = 0; i < ui32DeclCount; ++i)
{
if (psPhase->psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
{
psPhase->ui32TotalTemps = psPhase->psDecl[i].value.ui32NumTemps;
psPhase->psTempDeclaration = &psPhase->psDecl[i];
break;
}
}
if (psPhase->ui32TotalTemps == 0)
return;
psPhase->ui32OrigTemps = psPhase->ui32TotalTemps;
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
psPhase->pui32SplitInfo.clear();
psPhase->pui32SplitInfo.resize(psPhase->ui32TotalTemps * 2, 0xffffffff);
// Build use-define chains and split temps based on those.
{
DefineUseChains duChains;
UseDefineChains udChains;
BuildUseDefineChains(psPhase->psInst, psPhase->ui32TotalTemps, duChains, udChains, psPhase->GetCFG());
CalculateStandaloneDefinitions(duChains, psPhase->ui32TotalTemps);
// Only do sampler precision downgrade with pixel shaders on mobile targets / Switch
if (psShader->eShaderType == PIXEL_SHADER && (IsMobileTarget(this) || IsSwitch()))
UpdateSamplerPrecisions(psShader->sInfo, duChains, psPhase->ui32TotalTemps);
UDSplitTemps(&psPhase->ui32TotalTemps, duChains, udChains, psPhase->pui32SplitInfo);
WriteBackUsesAndDefines(duChains);
}
HLSLcc::DataTypeAnalysis::SetDataTypes(this, psPhase->psInst, psPhase->ui32TotalTemps, psPhase->peTempTypes);
if (psPhase->psTempDeclaration && (psPhase->ui32OrigTemps != psPhase->ui32TotalTemps))
psPhase->psTempDeclaration->value.ui32NumTemps = psPhase->ui32TotalTemps;
}
void HLSLCrossCompilerContext::ReserveFramebufferFetchInputs()
{
if (psShader->eShaderType != PIXEL_SHADER)
return;
if (!psShader->extensions->EXT_shader_framebuffer_fetch)
return;
if ((flags & HLSLCC_FLAG_SHADER_FRAMEBUFFER_FETCH) == 0)
return;
if (!(psShader->eTargetLanguage >= LANG_ES_300 && psShader->eTargetLanguage <= LANG_ES_LAST))
return;
if (!psDependencies)
return;
if (!HaveUniformBindingsAndLocations(psShader->eTargetLanguage, psShader->extensions, flags) &&
((flags & HLSLCC_FLAG_FORCE_EXPLICIT_LOCATIONS) == 0 || (flags & HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS) != 0))
return;
// The Adreno GLSL compiler fails to compile shaders that use the same location for textures and inout attachments
// So here we figure out the maximum index of any inout render target and then make sure that we never use those for textures.
int maxInOutRenderTargetIndex = -1;
for (const Declaration& decl : psShader->asPhases[0].psDecl)
{
if (decl.eOpcode != OPCODE_DCL_INPUT_PS)
continue;
const Operand& operand = decl.asOperands[0];
if (!operand.iPSInOut)
continue;
const ShaderInfo::InOutSignature* signature = NULL;
if (!psShader->sInfo.GetInputSignatureFromRegister(operand.ui32RegisterNumber, operand.ui32CompMask, &signature, true))
continue;
const int index = signature->ui32SemanticIndex;
if (index > maxInOutRenderTargetIndex)
maxInOutRenderTargetIndex = index;
}
if (maxInOutRenderTargetIndex >= 0)
{
if (maxInOutRenderTargetIndex >= psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture])
psDependencies->m_NextAvailableGLSLResourceBinding[GLSLCrossDependencyData::BufferType_Texture] = maxInOutRenderTargetIndex + 1;
}
}
void HLSLCrossCompilerContext::ClearDependencyData()
{
switch (psShader->eShaderType)
{
case PIXEL_SHADER:
{
psDependencies->ClearCrossDependencyData();
break;
}
case HULL_SHADER:
{
psDependencies->eTessPartitioning = TESSELLATOR_PARTITIONING_UNDEFINED;
psDependencies->eTessOutPrim = TESSELLATOR_OUTPUT_UNDEFINED;
break;
}
default:
break;
}
}
void HLSLCrossCompilerContext::AddIndentation()
{
int i;
bstring glsl = *currentGLSLString;
for (i = 0; i < indent; ++i)
{
bcatcstr(glsl, " ");
}
}
bool HLSLCrossCompilerContext::RequireExtension(const std::string &extName)
{
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
return true;
m_EnabledExtensions.insert(extName);
bformata(extensions, "#extension %s : require\n", extName.c_str());
return false;
}
bool HLSLCrossCompilerContext::EnableExtension(const std::string &extName)
{
if (m_EnabledExtensions.find(extName) != m_EnabledExtensions.end())
return true;
m_EnabledExtensions.insert(extName);
bformata(extensions, "#ifdef %s\n", extName.c_str());
bformata(extensions, "#extension %s : enable\n", extName.c_str());
bcatcstr(extensions, "#endif\n");
return false;
}
std::string HLSLCrossCompilerContext::GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const
{
std::ostringstream oss;
const ShaderInfo::InOutSignature* psIn = NULL;
int regSpace = psOperand->GetRegisterSpace(this);
if (iIgnoreRedirect == 0)
{
if ((regSpace == 0 && psShader->asPhases[currentPhase].acInputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
||
(regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
{
oss << "phase" << currentPhase << "_Input" << regSpace << "_" << psOperand->ui32RegisterNumber;
if (piRebase)
*piRebase = 0;
return oss.str();
}
}
if (regSpace == 0)
psShader->sInfo.GetInputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psIn, true);
if (psIn && piRebase)
*piRebase = psIn->iRebase;
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
std::string res = "";
bool skipPrefix = false;
if (psTranslator->TranslateSystemValue(psOperand, psIn, res, puiIgnoreSwizzle, psShader->aIndexedInput[regSpace][psOperand->ui32RegisterNumber] != 0, true, &skipPrefix, &iIgnoreRedirect))
{
if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0) && !skipPrefix)
return inputPrefix + res;
else
return res;
}
ASSERT(psIn != NULL);
oss << inputPrefix << (regSpace == 1 ? patchPrefix : "") << psIn->semanticName << psIn->ui32SemanticIndex;
return oss.str();
}
std::string HLSLCrossCompilerContext::GetDeclaredOutputName(const Operand* psOperand,
int* piStream,
uint32_t *puiIgnoreSwizzle,
int *piRebase,
int iIgnoreRedirect) const
{
std::ostringstream oss;
const ShaderInfo::InOutSignature* psOut = NULL;
int regSpace = psOperand->GetRegisterSpace(this);
if (iIgnoreRedirect == 0)
{
if ((regSpace == 0 && psShader->asPhases[currentPhase].acOutputNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe)
|| (regSpace == 1 && psShader->asPhases[currentPhase].acPatchConstantsNeedsRedirect[psOperand->ui32RegisterNumber] == 0xfe))
{
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOperand->ui32RegisterNumber;
if (piRebase)
*piRebase = 0;
return oss.str();
}
}
if (regSpace == 0)
psShader->sInfo.GetOutputSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), psShader->ui32CurrentVertexOutputStream, &psOut, true);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(psOperand->ui32RegisterNumber, psOperand->GetAccessMask(), &psOut, true);
if (psOut && piRebase)
*piRebase = psOut->iRebase;
if (psOut && (psOut->isIndexed.find(currentPhase) != psOut->isIndexed.end()))
{
// Need to route through temp output variable
oss << "phase" << currentPhase << "_Output" << regSpace << "_" << psOut->indexStart.find(currentPhase)->second;
if (!psOperand->m_SubOperands[0].get())
{
oss << "[" << psOperand->ui32RegisterNumber << "]";
}
if (piRebase)
*piRebase = 0;
return oss.str();
}
const std::string patchPrefix = psShader->eTargetLanguage == LANG_METAL ? "patch." : "patch";
std::string res = "";
if (psTranslator->TranslateSystemValue(psOperand, psOut, res, puiIgnoreSwizzle, psShader->aIndexedOutput[regSpace][psOperand->ui32RegisterNumber], false, NULL, &iIgnoreRedirect))
{
// clip/cull planes will always have interim variable, as HLSL operates on float4 but we need to size output accordingly with actual planes count
// with tessellation factor buffers, a separate buffer from output is used. for some reason TranslateSystemValue return *outSkipPrefix = true
// for ALL system vars and then we simply ignore it here, so opt to modify iIgnoreRedirect for these special cases
if (psShader->eTargetLanguage == LANG_METAL && regSpace == 0 && (iIgnoreRedirect == 0))
return outputPrefix + res;
else if (psShader->eTargetLanguage == LANG_METAL && (iIgnoreRedirect == 0))
return patchPrefix + res;
else
return res;
}
ASSERT(psOut != NULL);
oss << outputPrefix << (regSpace == 1 ? patchPrefix : "") << psOut->semanticName << psOut->ui32SemanticIndex;
return oss.str();
}
bool HLSLCrossCompilerContext::OutputNeedsDeclaring(const Operand* psOperand, const int count)
{
char compMask = (char)psOperand->ui32CompMask;
int regSpace = psOperand->GetRegisterSpace(this);
uint32_t startIndex = psOperand->ui32RegisterNumber + (psShader->ui32CurrentVertexOutputStream * 1024); // Assume less than 1K input streams
ASSERT(psShader->ui32CurrentVertexOutputStream < 4);
// First check for various builtins, mostly depth-output ones.
if (psShader->eShaderType == PIXEL_SHADER)
{
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL ||
psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL)
{
return true;
}
if (psOperand->eType == OPERAND_TYPE_OUTPUT_DEPTH)
{
// GL doesn't need declaration, Metal does.
return psShader->eTargetLanguage == LANG_METAL;
}
}
// Needs declaring if any of the components hasn't been already declared
if ((compMask & ~psShader->acOutputDeclared[regSpace][startIndex]) != 0)
{
int offset;
const ShaderInfo::InOutSignature* psSignature = NULL;
if (psOperand->eSpecialName == NAME_UNDEFINED)
{
// Need to fetch the actual comp mask
if (regSpace == 0)
psShader->sInfo.GetOutputSignatureFromRegister(
psOperand->ui32RegisterNumber,
psOperand->ui32CompMask,
psShader->ui32CurrentVertexOutputStream,
&psSignature);
else
psShader->sInfo.GetPatchConstantSignatureFromRegister(
psOperand->ui32RegisterNumber,
psOperand->ui32CompMask,
&psSignature);
compMask = (char)psSignature->ui32Mask;
}
for (offset = 0; offset < count; offset++)
{
psShader->acOutputDeclared[regSpace][startIndex + offset] |= compMask;
}
if (psSignature && (psSignature->semanticName == "PSIZE") && (psShader->eTargetLanguage != LANG_METAL))
{
// gl_PointSize, doesn't need declaring. TODO: Metal doesn't have pointsize at all?
return false;
}
return true;
}
return false;
}
bool HLSLCrossCompilerContext::IsVulkan() const
{
return (flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0;
}
bool HLSLCrossCompilerContext::IsSwitch() const
{
return (flags & HLSLCC_FLAG_NVN_TARGET) != 0;
}

View File

@ -1,250 +0,0 @@
#include "hlslcc.h"
#include <memory>
#include <sstream>
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/toGLSL.h"
#include "internal_includes/toMetal.h"
#include "internal_includes/Shader.h"
#include "internal_includes/decode.h"
#ifndef GL_VERTEX_SHADER_ARB
#define GL_VERTEX_SHADER_ARB 0x8B31
#endif
#ifndef GL_FRAGMENT_SHADER_ARB
#define GL_FRAGMENT_SHADER_ARB 0x8B30
#endif
#ifndef GL_GEOMETRY_SHADER
#define GL_GEOMETRY_SHADER 0x8DD9
#endif
#ifndef GL_TESS_EVALUATION_SHADER
#define GL_TESS_EVALUATION_SHADER 0x8E87
#endif
#ifndef GL_TESS_CONTROL_SHADER
#define GL_TESS_CONTROL_SHADER 0x8E88
#endif
#ifndef GL_COMPUTE_SHADER
#define GL_COMPUTE_SHADER 0x91B9
#endif
static bool CheckConstantBuffersNoDuplicateNames(const std::vector<ConstantBuffer>& buffers, HLSLccReflection& reflectionCallbacks)
{
uint32_t count = buffers.size();
for (uint32_t i = 0; i < count; ++i)
{
const ConstantBuffer& lhs = buffers[i];
for (uint32_t j = i + 1; j < count; ++j)
{
const ConstantBuffer& rhs = buffers[j];
if (lhs.name == rhs.name)
{
std::ostringstream oss;
oss << "Duplicate constant buffer declaration: " << lhs.name;
reflectionCallbacks.OnDiagnostics(oss.str(), 0, true);
return false;
}
}
}
return true;
}
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromMem(const char* shader,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result)
{
uint32_t* tokens;
char* glslcstr = NULL;
int GLSLShaderType = GL_FRAGMENT_SHADER_ARB;
int success = 0;
uint32_t i;
tokens = (uint32_t*)shader;
std::auto_ptr<Shader> psShader(DecodeDXBC(tokens, flags));
if (psShader.get())
{
Shader* shader = psShader.get();
if (!CheckConstantBuffersNoDuplicateNames(shader->sInfo.psConstantBuffers, reflectionCallbacks))
return 0;
HLSLCrossCompilerContext sContext(reflectionCallbacks);
// Add shader precisions from the list
psShader->sInfo.AddSamplerPrecisions(samplerPrecisions);
if (psShader->ui32MajorVersion <= 3)
{
flags &= ~HLSLCC_FLAG_COMBINE_TEXTURE_SAMPLERS;
}
#ifdef _DEBUG
flags |= HLSLCC_FLAG_INCLUDE_INSTRUCTIONS_COMMENTS;
#endif
sContext.psShader = shader;
sContext.flags = flags;
// If dependencies == NULL, we'll create a dummy object for it so that there's always something there.
std::auto_ptr<GLSLCrossDependencyData> depPtr(NULL);
if (dependencies == NULL)
{
depPtr.reset(new GLSLCrossDependencyData());
sContext.psDependencies = depPtr.get();
sContext.psDependencies->SetupGLSLResourceBindingSlotsIndices();
}
else
sContext.psDependencies = dependencies;
for (i = 0; i < psShader->asPhases.size(); ++i)
{
psShader->asPhases[i].hasPostShaderCode = 0;
}
if (language == LANG_METAL)
{
// Geometry shader is not supported
if (psShader->eShaderType == GEOMETRY_SHADER)
{
result->sourceCode = "";
return 0;
}
ToMetal translator(&sContext);
if (!translator.Translate())
{
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
return 0;
}
}
else
{
ToGLSL translator(&sContext);
language = translator.SetLanguage(language);
translator.SetExtensions(extensions);
if (!translator.Translate())
{
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
return 0;
}
}
switch (psShader->eShaderType)
{
case VERTEX_SHADER:
{
GLSLShaderType = GL_VERTEX_SHADER_ARB;
break;
}
case GEOMETRY_SHADER:
{
GLSLShaderType = GL_GEOMETRY_SHADER;
break;
}
case DOMAIN_SHADER:
{
GLSLShaderType = GL_TESS_EVALUATION_SHADER;
break;
}
case HULL_SHADER:
{
GLSLShaderType = GL_TESS_CONTROL_SHADER;
break;
}
case COMPUTE_SHADER:
{
GLSLShaderType = GL_COMPUTE_SHADER;
break;
}
default:
{
break;
}
}
glslcstr = bstr2cstr(sContext.glsl, '\0');
result->sourceCode = glslcstr;
bcstrfree(glslcstr);
bdestroy(sContext.glsl);
for (i = 0; i < psShader->asPhases.size(); ++i)
{
bdestroy(psShader->asPhases[i].postShaderCode);
bdestroy(psShader->asPhases[i].earlyMain);
}
result->reflection = psShader->sInfo;
result->textureSamplers = psShader->textureSamplers;
success = 1;
}
shader = 0;
tokens = 0;
/* Fill in the result struct */
result->shaderType = GLSLShaderType;
result->GLSLLanguage = language;
return success;
}
HLSLCC_API int HLSLCC_APIENTRY TranslateHLSLFromFile(const char* filename,
unsigned int flags,
GLLang language,
const GlExtensions *extensions,
GLSLCrossDependencyData* dependencies,
HLSLccSamplerPrecisionInfo& samplerPrecisions,
HLSLccReflection& reflectionCallbacks,
GLSLShader* result)
{
FILE* shaderFile;
int length;
size_t readLength;
std::vector<char> shader;
int success = 0;
shaderFile = fopen(filename, "rb");
if (!shaderFile)
{
return 0;
}
fseek(shaderFile, 0, SEEK_END);
length = ftell(shaderFile);
fseek(shaderFile, 0, SEEK_SET);
shader.resize(length + 1);
readLength = fread(&shader[0], 1, length, shaderFile);
fclose(shaderFile);
shaderFile = 0;
shader[readLength] = '\0';
success = TranslateHLSLFromMem(&shader[0], flags, language, extensions, dependencies, samplerPrecisions, reflectionCallbacks, result);
return success;
}

View File

@ -1,574 +0,0 @@
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/debug.h"
#include "internal_includes/toGLSLOperand.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Shader.h"
#include "internal_includes/languages.h"
#include "include/UnityInstancingFlexibleArraySize.h"
#include <sstream>
#include <cmath>
namespace HLSLcc
{
uint32_t GetNumberBitsSet(uint32_t a)
{
// Calculate number of bits in a
// Taken from https://graphics.stanford.edu/~seander/bithacks.html#CountBitsSet64
// Works only up to 14 bits (we're only using up to 4)
return (a * 0x200040008001ULL & 0x111111111111111ULL) % 0xf;
}
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType)
{
if (eType == SVT_FLOAT16)
{
return TO_FLAG_FORCE_HALF;
}
if (eType == SVT_UINT || eType == SVT_UINT16)
{
return TO_FLAG_UNSIGNED_INTEGER;
}
else if (eType == SVT_INT || eType == SVT_INT16 || eType == SVT_INT12)
{
return TO_FLAG_INTEGER;
}
else if (eType == SVT_BOOL)
{
return TO_FLAG_BOOL;
}
else
{
return TO_FLAG_NONE;
}
}
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags)
{
if (typeflags & TO_FLAG_FORCE_HALF)
return SVT_FLOAT16;
if (typeflags & (TO_FLAG_INTEGER | TO_AUTO_BITCAST_TO_INT))
return SVT_INT;
if (typeflags & (TO_FLAG_UNSIGNED_INTEGER | TO_AUTO_BITCAST_TO_UINT))
return SVT_UINT;
if (typeflags & TO_FLAG_BOOL)
return SVT_BOOL;
return SVT_FLOAT;
}
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision)
{
static const char * const uintTypes[] = { " ", "uint", "uvec2", "uvec3", "uvec4" };
static const char * const uint16Types[] = { " ", "mediump uint", "mediump uvec2", "mediump uvec3", "mediump uvec4" };
static const char * const intTypes[] = { " ", "int", "ivec2", "ivec3", "ivec4" };
static const char * const int16Types[] = { " ", "mediump int", "mediump ivec2", "mediump ivec3", "mediump ivec4" };
static const char * const int12Types[] = { " ", "lowp int", "lowp ivec2", "lowp ivec3", "lowp ivec4" };
static const char * const floatTypes[] = { " ", "float", "vec2", "vec3", "vec4" };
static const char * const float16Types[] = { " ", "mediump float", "mediump vec2", "mediump vec3", "mediump vec4" };
static const char * const float10Types[] = { " ", "lowp float", "lowp vec2", "lowp vec3", "lowp vec4" };
static const char * const boolTypes[] = { " ", "bool", "bvec2", "bvec3", "bvec4" };
ASSERT(components >= 1 && components <= 4);
bool emitLowp = EmitLowp(context);
switch (eType)
{
case SVT_UINT:
return HaveUnsignedTypes(context->psShader->eTargetLanguage) ? uintTypes[components] : intTypes[components];
case SVT_UINT16:
return useGLSLPrecision ? uint16Types[components] : uintTypes[components];
case SVT_INT:
return intTypes[components];
case SVT_INT16:
return useGLSLPrecision ? int16Types[components] : intTypes[components];
case SVT_INT12:
return useGLSLPrecision ? (emitLowp ? int12Types[components] : int16Types[components]) : intTypes[components];
case SVT_FLOAT:
return floatTypes[components];
case SVT_FLOAT16:
return useGLSLPrecision ? float16Types[components] : floatTypes[components];
case SVT_FLOAT10:
return useGLSLPrecision ? (emitLowp ? float10Types[components] : float16Types[components]) : floatTypes[components];
case SVT_BOOL:
return boolTypes[components];
default:
ASSERT(0);
return " ";
}
}
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components)
{
static const char * const uintTypes[] = { " ", "uint", "uint2", "uint3", "uint4" };
static const char * const ushortTypes[] = { " ", "ushort", "ushort2", "ushort3", "ushort4" };
static const char * const intTypes[] = { " ", "int", "int2", "int3", "int4" };
static const char * const shortTypes[] = { " ", "short", "short2", "short3", "short4" };
static const char * const floatTypes[] = { " ", "float", "float2", "float3", "float4" };
static const char * const halfTypes[] = { " ", "half", "half2", "half3", "half4" };
static const char * const boolTypes[] = { " ", "bool", "bool2", "bool3", "bool4" };
ASSERT(components >= 1 && components <= 4);
switch (eType)
{
case SVT_UINT:
return uintTypes[components];
case SVT_UINT16:
case SVT_UINT8: // there is not uint8 in metal so treat it as ushort
return ushortTypes[components];
case SVT_INT:
return intTypes[components];
case SVT_INT16:
case SVT_INT12:
return shortTypes[components];
case SVT_FLOAT:
return floatTypes[components];
case SVT_FLOAT16:
case SVT_FLOAT10:
return halfTypes[components];
case SVT_BOOL:
return boolTypes[components];
default:
ASSERT(0);
return " ";
}
}
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision /* = true*/)
{
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return GetConstructorForTypeMetal(eType, components);
else
return GetConstructorForTypeGLSL(psContext, eType, components, useGLSLPrecision);
}
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows)
{
std::string result;
std::ostringstream oss;
if (psContext->psShader->eTargetLanguage == LANG_METAL)
{
switch (eBaseType)
{
case SVT_FLOAT:
oss << "float" << columns << "x" << rows;
break;
case SVT_FLOAT16:
case SVT_FLOAT10:
oss << "half" << columns << "x" << rows;
break;
default:
ASSERT(0);
break;
}
}
else
{
switch (eBaseType)
{
case SVT_FLOAT:
oss << "mat" << columns << "x" << rows;
break;
case SVT_FLOAT16:
oss << "mediump mat" << columns << "x" << rows;
break;
case SVT_FLOAT10:
oss << "lowp mat" << columns << "x" << rows;
break;
default:
ASSERT(0);
break;
}
}
result = oss.str();
return result;
}
void AddSwizzleUsingElementCount(bstring dest, uint32_t count)
{
if (count == 4)
return;
if (count)
{
bcatcstr(dest, ".");
bcatcstr(dest, "x");
count--;
}
if (count)
{
bcatcstr(dest, "y");
count--;
}
if (count)
{
bcatcstr(dest, "z");
count--;
}
if (count)
{
bcatcstr(dest, "w");
count--;
}
}
// Calculate the bits set in mask
int WriteMaskToComponentCount(uint32_t writeMask)
{
// In HLSL bytecode writemask 0 also means everything
if (writeMask == 0)
return 4;
return (int)GetNumberBitsSet(writeMask);
}
uint32_t BuildComponentMaskFromElementCount(int count)
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
return (1 << count) - 1;
}
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src)
{
if (src == dest)
return true;
if ((dest == SVT_FLOAT || dest == SVT_FLOAT10 || dest == SVT_FLOAT16) &&
(src == SVT_FLOAT || src == SVT_FLOAT10 || src == SVT_FLOAT16))
return true;
if ((dest == SVT_INT || dest == SVT_INT12 || dest == SVT_INT16) &&
(src == SVT_INT || src == SVT_INT12 || src == SVT_INT16))
return true;
if ((dest == SVT_UINT || dest == SVT_UINT16) &&
(src == SVT_UINT || src == SVT_UINT16))
return true;
return false;
}
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType)
{
if (eType == RETURN_TYPE_SINT)
{
return TO_FLAG_INTEGER;
}
else if (eType == RETURN_TYPE_UINT)
{
return TO_FLAG_UNSIGNED_INTEGER;
}
else
{
return TO_FLAG_NONE;
}
}
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec)
{
if (eType == RETURN_TYPE_SINT)
{
switch (ePrec)
{
default:
return SVT_INT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_INT12;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_INT16;
}
}
else if (eType == RETURN_TYPE_UINT)
{
switch (ePrec)
{
default:
return SVT_UINT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_UINT8;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_UINT16;
}
}
else
{
switch (ePrec)
{
default:
return SVT_FLOAT;
case REFLECT_RESOURCE_PRECISION_LOWP:
return SVT_FLOAT10;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return SVT_FLOAT16;
}
}
}
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type)
{
switch (type)
{
case SVT_INT:
case SVT_INT12:
case SVT_INT16:
return RETURN_TYPE_SINT;
case SVT_UINT:
case SVT_UINT16:
return RETURN_TYPE_UINT;
case SVT_FLOAT:
case SVT_FLOAT10:
case SVT_FLOAT16:
return RETURN_TYPE_FLOAT;
default:
return RETURN_TYPE_UNUSED;
}
}
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type)
{
switch (type)
{
case SVT_INT:
case SVT_UINT:
case SVT_FLOAT:
return REFLECT_RESOURCE_PRECISION_HIGHP;
case SVT_INT16:
case SVT_UINT16:
case SVT_FLOAT16:
return REFLECT_RESOURCE_PRECISION_MEDIUMP;
case SVT_INT12:
case SVT_FLOAT10:
case SVT_UINT8:
return REFLECT_RESOURCE_PRECISION_LOWP;
default:
return REFLECT_RESOURCE_PRECISION_UNKNOWN;
}
}
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount)
{
return TO_AUTO_EXPAND_TO_VEC2 << (elemCount - 2);
}
// Returns true if the operation is commutative
bool IsOperationCommutative(int eOpCode)
{
switch ((OPCODE_TYPE)eOpCode)
{
case OPCODE_DADD:
case OPCODE_IADD:
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_IMUL:
case OPCODE_OR:
case OPCODE_AND:
return true;
default:
return false;
}
}
// Returns true if operands are identical, only cares about temp registers currently.
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB)
{
if (!psA || !psB)
return 0;
if (psA->eType != OPERAND_TYPE_TEMP || psB->eType != OPERAND_TYPE_TEMP)
return 0;
if (psA->eModifier != psB->eModifier)
return 0;
if (psA->iNumComponents != psB->iNumComponents)
return 0;
if (psA->ui32RegisterNumber != psB->ui32RegisterNumber)
return 0;
if (psA->eSelMode != psB->eSelMode)
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_MASK_MODE && psA->ui32CompMask != psB->ui32CompMask)
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE && psA->aui32Swizzle[0] != psB->aui32Swizzle[0])
return 0;
if (psA->eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE && std::equal(&psA->aui32Swizzle[0], &psA->aui32Swizzle[4], &psB->aui32Swizzle[0]))
return 0;
return 1;
}
bool IsAddOneInstruction(const Instruction *psInst)
{
if (psInst->eOpcode != OPCODE_IADD)
return false;
if (psInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
if (psInst->asOperands[1].eType == OPERAND_TYPE_TEMP)
{
if (psInst->asOperands[1].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
return false;
if (psInst->asOperands[2].eType != OPERAND_TYPE_IMMEDIATE32)
return false;
if (*(int *)&psInst->asOperands[2].afImmediates[0] != 1)
return false;
}
else
{
if (psInst->asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32)
return false;
if (psInst->asOperands[2].eType != OPERAND_TYPE_TEMP)
return false;
if (psInst->asOperands[2].ui32RegisterNumber != psInst->asOperands[0].ui32RegisterNumber)
return false;
if (*(int *)&psInst->asOperands[1].afImmediates[0] != 1)
return false;
}
return true;
}
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim)
{
switch ((RESOURCE_DIMENSION)eResDim)
{
case RESOURCE_DIMENSION_TEXTURE1D:
return 1;
case RESOURCE_DIMENSION_TEXTURE2D:
case RESOURCE_DIMENSION_TEXTURE2DMS:
case RESOURCE_DIMENSION_TEXTURE1DARRAY:
case RESOURCE_DIMENSION_TEXTURECUBE:
return 2;
case RESOURCE_DIMENSION_TEXTURE3D:
case RESOURCE_DIMENSION_TEXTURE2DARRAY:
case RESOURCE_DIMENSION_TEXTURE2DMSARRAY:
case RESOURCE_DIMENSION_TEXTURECUBEARRAY:
return 3;
default:
ASSERT(0);
break;
}
return 0;
}
// Returns the "more important" type of a and b, currently int < uint < float
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b)
{
#define DO_CHECK(type) if( a == type || b == type ) return type
// Priority ordering
DO_CHECK(SVT_FLOAT16);
DO_CHECK(SVT_FLOAT10);
DO_CHECK(SVT_UINT16);
DO_CHECK(SVT_UINT8);
DO_CHECK(SVT_INT16);
DO_CHECK(SVT_INT12);
DO_CHECK(SVT_FORCED_INT);
DO_CHECK(SVT_FLOAT);
DO_CHECK(SVT_UINT);
DO_CHECK(SVT_INT);
DO_CHECK(SVT_INT_AMBIGUOUS);
#undef DO_CHECK
// After these just rely on ordering.
return a > b ? a : b;
}
// Returns true if a direct constructor can convert src->dest
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest)
{
// uint<->int<->bool conversions possible
if ((src == SVT_INT || src == SVT_UINT || src == SVT_BOOL || src == SVT_INT12 || src == SVT_INT16 || src == SVT_UINT16) &&
(dest == SVT_INT || dest == SVT_UINT || dest == SVT_BOOL || dest == SVT_INT12 || dest == SVT_INT16 || dest == SVT_UINT16))
return true;
// float<->double possible
if ((src == SVT_FLOAT || src == SVT_DOUBLE || src == SVT_FLOAT16 || src == SVT_FLOAT10) &&
(dest == SVT_FLOAT || dest == SVT_DOUBLE || dest == SVT_FLOAT16 || dest == SVT_FLOAT10))
return true;
if (context->psShader->eTargetLanguage == LANG_METAL)
{
// avoid compiler error: cannot use as_type to cast from 'half' to 'unsigned int' or 'int', types of different size
if ((src == SVT_FLOAT16 || src == SVT_FLOAT10) && (dest == SVT_UINT || dest == SVT_INT))
return true;
}
return false;
}
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf)
{
return psCBuf != NULL && psCBuf->asVars.size() == 1
&& psCBuf->asVars[0].sType.Class == SVC_STRUCT && psCBuf->asVars[0].sType.Elements == 2
&& IsUnityInstancingConstantBufferName(psCBuf->name.c_str());
}
#ifndef fpcheck
#ifdef _MSC_VER
#define fpcheck(x) (_isnan(x) || !_finite(x))
#else
#define fpcheck(x) (std::isnan(x) || std::isinf(x))
#endif
#endif // #ifndef fpcheck
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f)
{
bstring temp;
int ePos;
int pointPos;
temp = bformat("%.9g", f);
ePos = bstrchrp(temp, 'e', 0);
pointPos = bstrchrp(temp, '.', 0);
bconcat(b, temp);
bdestroy(temp);
if (ePos < 0 && pointPos < 0 && !fpcheck(f))
bcatcstr(b, ".0");
}
bstring GetEarlyMain(HLSLCrossCompilerContext *psContext)
{
bstring *oldString = psContext->currentGLSLString;
bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].earlyMain;
int indent = psContext->indent;
if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent)
++psContext->indent;
psContext->currentGLSLString = str;
psContext->AddIndentation();
psContext->currentGLSLString = oldString;
psContext->indent = indent;
return *str;
}
bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext)
{
bstring *oldString = psContext->currentGLSLString;
bstring *str = &psContext->psShader->asPhases[psContext->currentPhase].postShaderCode;
int indent = psContext->indent;
if (psContext->psShader->eTargetLanguage == LANG_METAL && !psContext->indent)
++psContext->indent;
psContext->psShader->asPhases[psContext->currentPhase].hasPostShaderCode = 1;
psContext->currentGLSLString = str;
psContext->AddIndentation();
psContext->currentGLSLString = oldString;
psContext->indent = indent;
return *str;
}
}

View File

@ -1,10 +0,0 @@
<?xml version="1.0" encoding="utf-8"?>
<AutoVisualizer xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010">
<Type Name="Instruction">
<DisplayString>{{ id={id} op={eOpcode} o0={asOperands[0]}, o1={asOperands[1]}}}</DisplayString>
</Type>
<Type Name="Operand">
<DisplayString>{{ type={eType}, reg={ui32RegisterNumber} }}</DisplayString>
</Type>
</AutoVisualizer>

View File

@ -1,349 +0,0 @@
#include "internal_includes/Instruction.h"
#include "internal_includes/debug.h"
#include "include/ShaderInfo.h"
// Returns the result swizzle operand for an instruction, or NULL if all src operands have swizzles
static Operand *GetSrcSwizzleOperand(Instruction *psInst)
{
switch (psInst->eOpcode)
{
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_NOP:
case OPCODE_SWAPC:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
ASSERT(0);
return NULL;
// Normal arithmetics, all srcs have swizzles
case OPCODE_ADD:
case OPCODE_AND:
case OPCODE_DERIV_RTX:
case OPCODE_DERIV_RTX_COARSE:
case OPCODE_DERIV_RTX_FINE:
case OPCODE_DERIV_RTY:
case OPCODE_DERIV_RTY_COARSE:
case OPCODE_DERIV_RTY_FINE:
case OPCODE_DIV:
case OPCODE_EQ:
case OPCODE_EXP:
case OPCODE_FRC:
case OPCODE_FTOI:
case OPCODE_FTOU:
case OPCODE_GE:
case OPCODE_IADD:
case OPCODE_IEQ:
case OPCODE_IGE:
case OPCODE_ILT:
case OPCODE_IMAD:
case OPCODE_IMAX:
case OPCODE_IMIN:
case OPCODE_IMUL:
case OPCODE_INE:
case OPCODE_INEG:
case OPCODE_ITOF:
case OPCODE_LOG:
case OPCODE_LT:
case OPCODE_MAD:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_MOV:
case OPCODE_MUL:
case OPCODE_NE:
case OPCODE_NOT:
case OPCODE_OR:
case OPCODE_ROUND_NE:
case OPCODE_ROUND_NI:
case OPCODE_ROUND_PI:
case OPCODE_ROUND_Z:
case OPCODE_RSQ:
case OPCODE_SINCOS:
case OPCODE_SQRT:
case OPCODE_UDIV:
case OPCODE_UGE:
case OPCODE_ULT:
case OPCODE_UMAD:
case OPCODE_UMAX:
case OPCODE_UMIN:
case OPCODE_UMUL:
case OPCODE_UTOF:
case OPCODE_XOR:
case OPCODE_BFI:
case OPCODE_BFREV:
case OPCODE_COUNTBITS:
case OPCODE_DADD:
case OPCODE_DDIV:
case OPCODE_DEQ:
case OPCODE_DFMA:
case OPCODE_DGE:
case OPCODE_DLT:
case OPCODE_DMAX:
case OPCODE_DMIN:
case OPCODE_DMUL:
case OPCODE_DMOV:
case OPCODE_DNE:
case OPCODE_DRCP:
case OPCODE_DTOF:
case OPCODE_F16TOF32:
case OPCODE_F32TOF16:
case OPCODE_FIRSTBIT_HI:
case OPCODE_FIRSTBIT_LO:
case OPCODE_FIRSTBIT_SHI:
case OPCODE_FTOD:
case OPCODE_IBFE:
case OPCODE_RCP:
case OPCODE_UADDC:
case OPCODE_UBFE:
case OPCODE_USUBB:
case OPCODE_MOVC:
case OPCODE_DMOVC:
return NULL;
// Special cases:
case OPCODE_GATHER4:
case OPCODE_GATHER4_C:
case OPCODE_LD:
case OPCODE_LD_MS:
case OPCODE_LOD:
case OPCODE_LD_UAV_TYPED:
case OPCODE_LD_RAW:
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_B:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_RESINFO:
return &psInst->asOperands[2];
case OPCODE_GATHER4_PO:
case OPCODE_GATHER4_PO_C:
case OPCODE_LD_STRUCTURED:
return &psInst->asOperands[3];
case OPCODE_SAMPLE_INFO:
return &psInst->asOperands[1];
case OPCODE_ISHL:
case OPCODE_ISHR:
case OPCODE_USHR:
// sm4 variant has single component selection on src1 -> only src0 has swizzle
if (psInst->asOperands[2].eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
return &psInst->asOperands[1];
else // whereas sm5 variant has swizzle also on src1
return NULL;
default:
ASSERT(0);
return NULL;
}
}
// Tweak the source operands of an instruction so that the rebased write mask will still work
static void DoSrcOperandRebase(Operand *psOperand, uint32_t rebase)
{
uint32_t i;
switch (psOperand->eSelMode)
{
default:
case OPERAND_4_COMPONENT_MASK_MODE:
ASSERT(psOperand->ui32CompMask == 0 || psOperand->ui32CompMask == OPERAND_4_COMPONENT_MASK_ALL);
// Special case for immediates, they do not have swizzles
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE32)
{
if (psOperand->iNumComponents > 1)
std::copy(&psOperand->afImmediates[rebase], &psOperand->afImmediates[4], &psOperand->afImmediates[0]);
return;
}
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE64)
{
if (psOperand->iNumComponents > 1)
std::copy(&psOperand->adImmediates[rebase], &psOperand->adImmediates[4], &psOperand->adImmediates[0]);
return;
}
// Need to change this to swizzle
psOperand->eSelMode = OPERAND_4_COMPONENT_SWIZZLE_MODE;
psOperand->ui32Swizzle = 0;
for (i = 0; i < 4 - rebase; i++)
psOperand->aui32Swizzle[i] = i + rebase;
for (; i < 4; i++)
psOperand->aui32Swizzle[i] = rebase; // The first actual input.
break;
case OPERAND_4_COMPONENT_SELECT_1_MODE:
// Nothing to do
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
for (i = rebase; i < 4; i++)
psOperand->aui32Swizzle[i - rebase] = psOperand->aui32Swizzle[i];
break;
}
}
void Instruction::ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase)
{
uint32_t i = 0;
uint32_t accessMask = 0;
int isDestination = 0;
Operand *psSwizzleOperand = NULL;
if (flags & UD_CHANGE_SUBOPERANDS)
{
for (i = 0; i < MAX_SUB_OPERANDS; i++)
{
if (psOperand->m_SubOperands[i].get())
ChangeOperandTempRegister(psOperand->m_SubOperands[i].get(), oldReg, newReg, compMask, UD_CHANGE_ALL, rebase);
}
}
if ((flags & UD_CHANGE_MAIN_OPERAND) == 0)
return;
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
if (psOperand->ui32RegisterNumber != oldReg)
return;
accessMask = psOperand->GetAccessMask();
// If this operation touches other components than the one(s) we're splitting, skip it
if ((accessMask & (~compMask)) != 0)
{
// Verify that we've not messed up in reachability analysis.
// This would mean that we've encountered an instruction that accesses
// a component in multi-component mode and we're supposed to treat it as single-use only.
// Now that we track operands we can bring this back
ASSERT((accessMask & compMask) == 0);
return;
}
#if 0
printf("Updating operand %d with access mask %X\n", (int)psOperand->id, accessMask);
#endif
psOperand->ui32RegisterNumber = newReg;
if (rebase == 0)
return;
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
switch (psOperand->eSelMode)
{
case OPERAND_4_COMPONENT_MASK_MODE:
{
uint32_t oldMask = psOperand->ui32CompMask;
if (oldMask == 0)
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
// Check that we're not losing any information
ASSERT((oldMask >> rebase) << rebase == oldMask);
psOperand->ui32CompMask = (oldMask >> rebase);
break;
}
case OPERAND_4_COMPONENT_SELECT_1_MODE:
ASSERT(psOperand->aui32Swizzle[0] >= rebase);
psOperand->aui32Swizzle[0] -= rebase;
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
{
for (i = 0; i < 4; i++)
{
// Note that this rebase is different from the one done for source operands
ASSERT(psOperand->aui32Swizzle[i] >= rebase);
psOperand->aui32Swizzle[i] -= rebase;
}
break;
}
default:
ASSERT(0);
}
// Tweak operand datatypes
std::copy(&psOperand->aeDataType[rebase], &psOperand->aeDataType[4], &psOperand->aeDataType[0]);
// If this operand is a destination, we'll need to tweak sources as well
for (i = 0; i < ui32FirstSrc; i++)
{
if (psOperand == &asOperands[i])
{
isDestination = 1;
break;
}
}
if (isDestination == 0)
return;
// Nasty corner case of 2 destinations, not supported if both targets are written
ASSERT((ui32FirstSrc < 2) || (asOperands[0].eType == OPERAND_TYPE_NULL) || (asOperands[1].eType == OPERAND_TYPE_NULL));
// If we made it this far, we're rebasing a destination temp (and the only destination), need to tweak sources depending on the instruction
switch (eOpcode)
{
// The opcodes that do not need tweaking:
case OPCODE_DP2:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_BUFINFO:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
return;
default:
psSwizzleOperand = GetSrcSwizzleOperand(this); // Null means tweak all source operands
if (psSwizzleOperand)
{
DoSrcOperandRebase(psSwizzleOperand, rebase);
return;
}
else
{
for (i = ui32FirstSrc; i < ui32NumOperands; i++)
{
DoSrcOperandRebase(&asOperands[i], rebase);
}
}
return;
}
}
// Returns nonzero if psInst is a sample instruction and the sampler has medium or low precision
bool Instruction::IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const
{
const Operand *op;
const ResourceBinding *psBinding = NULL;
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
switch (eOpcode)
{
default:
return false;
case OPCODE_SAMPLE:
case OPCODE_SAMPLE_B:
case OPCODE_SAMPLE_L:
case OPCODE_SAMPLE_D:
case OPCODE_SAMPLE_C:
case OPCODE_SAMPLE_C_LZ:
break;
}
op = &asOperands[3];
ASSERT(op->eType == OPERAND_TYPE_SAMPLER);
info.GetResourceFromBindingPoint(RGROUP_SAMPLER, op->ui32RegisterNumber, &psBinding);
if (!psBinding)
{
/* Try to look from texture group */
info.GetResourceFromBindingPoint(RGROUP_TEXTURE, op->ui32RegisterNumber, &psBinding);
}
sType = Operand::ResourcePrecisionToOperandPrecision(psBinding ? psBinding->ePrecision : REFLECT_RESOURCE_PRECISION_UNKNOWN);
if (sType == OPERAND_MIN_PRECISION_DEFAULT)
return false;
if (pType)
*pType = sType;
return true;
}

View File

@ -1,370 +0,0 @@
#include "src/internal_includes/HLSLCrossCompilerContext.h"
#include "src/internal_includes/LoopTransform.h"
#include "src/internal_includes/Shader.h"
#include "src/internal_includes/debug.h"
#include <algorithm>
#include <vector>
#include <list>
namespace HLSLcc
{
struct LoopInfo
{
public:
LoopInfo() : m_StartLoop(0), m_EndLoop(0), m_ExitPoints(), m_IsSwitch(false) {}
Instruction * m_StartLoop; // OPCODE_LOOP
Instruction * m_EndLoop; // OPCODE_ENDLOOP that matches the LOOP above.
std::vector<Instruction *> m_ExitPoints; // Any BREAK/RET/BREAKC instructions within the same loop depth
bool m_IsSwitch; // True if this is a switch-case and not a LOOP/ENDLOOP pair. Used as a helper when parsing.
};
typedef std::list<LoopInfo> Loops;
// Build a loopinfo array of all the loops in this shader phase
void BuildLoopInfo(ShaderPhase &phase, Loops &res)
{
using namespace std;
res.clear();
// A stack of loopinfo elements (stored in res)
list<LoopInfo *> loopStack;
// Storage for dummy LoopInfo elements to be used for switch-cases. We don't want them cluttering the Loops list so store them here.
list<LoopInfo> dummyLIForSwitches;
for (std::vector<Instruction>::iterator instItr = phase.psInst.begin(); instItr != phase.psInst.end(); instItr++)
{
Instruction *i = &*instItr;
if (i->eOpcode == OPCODE_LOOP)
{
LoopInfo *currLoopInfo = &*res.insert(res.end(), LoopInfo());
currLoopInfo->m_StartLoop = i;
loopStack.push_front(currLoopInfo);
}
else if (i->eOpcode == OPCODE_ENDLOOP)
{
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
loopStack.pop_front();
li->m_EndLoop = i;
}
else if (i->eOpcode == OPCODE_SWITCH)
{
// Create a dummy entry into the stack
LoopInfo *li = &*dummyLIForSwitches.insert(dummyLIForSwitches.end(), LoopInfo());
li->m_IsSwitch = true;
loopStack.push_front(li);
}
else if (i->eOpcode == OPCODE_ENDSWITCH)
{
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
loopStack.pop_front();
ASSERT(li->m_IsSwitch);
}
else if (i->eOpcode == OPCODE_BREAK || i->eOpcode == OPCODE_BREAKC)
{
// Get the current loopstack head
ASSERT(!loopStack.empty());
LoopInfo *li = *loopStack.begin();
// Ignore breaks from switch-cases
if (!li->m_IsSwitch)
{
li->m_ExitPoints.push_back(i);
}
}
}
}
// Returns true if the given instruction is a non-vectorized int or uint comparison instruction that reads from at least one temp and writes to a temp
static bool IsScalarTempComparisonInstruction(const Instruction *i)
{
switch (i->eOpcode)
{
default:
return false;
case OPCODE_IGE:
case OPCODE_ILT:
case OPCODE_IEQ:
case OPCODE_INE:
case OPCODE_UGE:
case OPCODE_ULT:
break;
}
if (i->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
int tempOp = -1;
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP)
tempOp = 1;
else if (i->asOperands[2].eType == OPERAND_TYPE_TEMP)
tempOp = 2;
// Also reject comparisons where we compare temp.x vs temp.y
if (i->asOperands[1].eType == OPERAND_TYPE_TEMP && i->asOperands[2].eType == OPERAND_TYPE_TEMP && i->asOperands[1].ui32RegisterNumber == i->asOperands[2].ui32RegisterNumber)
return false;
if (tempOp == -1)
return false;
if (i->asOperands[0].GetNumSwizzleElements() != 1)
return false;
return true;
}
// Returns true iff both instructions perform identical operation. For the purposes of Loop transformation, we only consider operations of type tX = tX <op> imm32
static bool AreInstructionsIdentical(const Instruction *a, const Instruction *b)
{
if (a->eOpcode != b->eOpcode)
return false;
ASSERT(a->ui32NumOperands == b->ui32NumOperands);
uint32_t dstReg = 0;
if (a->asOperands[0].eType != OPERAND_TYPE_TEMP)
return false;
dstReg = a->asOperands[0].ui32RegisterNumber;
for (uint32_t i = 0; i < a->ui32NumOperands; i++)
{
const Operand &aop = a->asOperands[i];
const Operand &bop = b->asOperands[i];
if (aop.eType != bop.eType)
return false;
if (aop.GetAccessMask() != bop.GetAccessMask())
return false;
if (aop.GetNumSwizzleElements() != 1)
return false;
if (aop.eType == OPERAND_TYPE_TEMP)
{
if (aop.ui32RegisterNumber != bop.ui32RegisterNumber)
return false;
if (aop.ui32RegisterNumber != dstReg)
return false;
}
else if (aop.eType == OPERAND_TYPE_IMMEDIATE32)
{
if (memcmp(aop.afImmediates, bop.afImmediates, 4 * sizeof(float)) != 0)
return false;
}
}
return true;
}
// Attempt to transform a single loop into a for-statement
static void AttemptLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase, LoopInfo &li)
{
// In order to transform a loop into a for, the following has to hold:
// - The loop must start with a comparison instruction where one of the src operands is a temp (induction variable), followed by OPCODE_BREAKC.
// - The loop must end with an arithmetic operation (SUB or ADD) where the dest operand is the same temp as one of the sources in the comparison instruction above
// Additionally, if the loop induction variable is initialized before the start of the loop and it has only uses inside the LOOP/ENDLOOP pair, we can declare that inside the for statement.
// Also, the loop induction variable must be standalone (as in, never used as part of a larger vector)
Instruction *cmpInst = li.m_StartLoop + 1;
if (!IsScalarTempComparisonInstruction(cmpInst))
return;
Instruction *breakInst = li.m_StartLoop + 2;
if (breakInst->eOpcode != OPCODE_BREAKC)
return;
if (breakInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return;
if (breakInst->asOperands[0].ui32RegisterNumber != cmpInst->asOperands[0].ui32RegisterNumber)
return;
// Check that the comparison result isn't used anywhere else
if (cmpInst->m_Uses.size() != 1)
return;
ASSERT(cmpInst->m_Uses[0].m_Inst == breakInst);
// Ok, at least we have the comparison + breakc combo at top. Try to find the induction variable
uint32_t inductionVarIdx = 0;
Instruction *lastInst = li.m_EndLoop - 1;
if (lastInst->eOpcode != OPCODE_IADD)
return;
if (lastInst->asOperands[0].eType != OPERAND_TYPE_TEMP)
return;
if (lastInst->asOperands[0].GetNumSwizzleElements() != 1)
return;
uint32_t indVar = lastInst->asOperands[0].ui32RegisterNumber;
// Verify that the induction variable actually matches.
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == indVar)
inductionVarIdx = 1;
else if (cmpInst->asOperands[2].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[2].ui32RegisterNumber == indVar)
inductionVarIdx = 2;
else
return;
// Verify that we also read from the induction variable in the last instruction
if (!((lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == indVar) ||
(lastInst->asOperands[2].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[2].ui32RegisterNumber == indVar)))
return;
// Nvidia compiler bug workaround: The shader compiler tries to be smart and unrolls constant loops,
// but then fails miserably if the loop variable is used as an index to UAV loads/stores or some other cases ("array access too complex")
// This is also triggered when the driver optimizer sees "simple enough" arithmetics (whatever that is) done on the loop variable before indexing.
// So, disable for-loop transformation altogether whenever we see a UAV load or store inside a loop.
if (psContext->psShader->eTargetLanguage >= LANG_400 && psContext->psShader->eTargetLanguage < LANG_GL_LAST && !psContext->IsVulkan())
{
for (auto itr = li.m_StartLoop; itr != li.m_EndLoop; itr++)
{
switch (itr->eOpcode)
{
case OPCODE_LD_RAW:
case OPCODE_LD_STRUCTURED:
case OPCODE_LD_UAV_TYPED:
case OPCODE_STORE_RAW:
case OPCODE_STORE_STRUCTURED:
case OPCODE_STORE_UAV_TYPED:
return; // Nope, can't do a for, not even a partial one.
default:
break;
}
}
}
// One more thing to check: The comparison input may only see 1 definition that originates from inside the loop range: the one in lastInst.
// Anything else means that there's a continue statement, or another break/breakc and that means that lastInst wouldn't get called.
// Of course, if all those instructions are identical, then it's fine.
// Ideally, if there's only one definition that's from outside the loop range, then we can use that as the initializer, as well.
Instruction *initializer = NULL;
std::vector<const Operand::Define *> definitionsOutsideRange;
std::vector<const Operand::Define *> definitionsInsideRange;
std::for_each(cmpInst->asOperands[inductionVarIdx].m_Defines.begin(), cmpInst->asOperands[inductionVarIdx].m_Defines.end(), [&](const Operand::Define &def)
{
if (def.m_Inst < li.m_StartLoop || def.m_Inst > li.m_EndLoop)
definitionsOutsideRange.push_back(&def);
else
definitionsInsideRange.push_back(&def);
});
if (definitionsInsideRange.size() != 1)
{
// All definitions must be identical
for (std::vector<const Operand::Define*>::iterator itr = definitionsInsideRange.begin() + 1; itr != definitionsInsideRange.end(); itr++)
{
if (!AreInstructionsIdentical((*itr)->m_Inst, definitionsInsideRange[0]->m_Inst))
return;
}
}
ASSERT(definitionsOutsideRange.size() > 0);
if (definitionsOutsideRange.size() == 1)
initializer = definitionsOutsideRange[0]->m_Inst;
// Initializer must only write to one component
if (initializer && initializer->asOperands[0].GetNumSwizzleElements() != 1)
initializer = 0;
// Initializer data type must be int or uint
if (initializer)
{
SHADER_VARIABLE_TYPE dataType = initializer->asOperands[0].GetDataType(psContext);
if (dataType != SVT_INT && dataType != SVT_UINT)
return;
}
// Check that the initializer is only used within the range so we can move it to for statement
if (initializer)
{
bool hasUsesOutsideRange = false;
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [&](const Instruction::Use &u)
{
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
hasUsesOutsideRange = true;
});
// Has outside uses? we cannot pull that up to the for statement
if (hasUsesOutsideRange)
initializer = 0;
}
// Check that the loop adder instruction only has uses inside the loop range, otherwise we cannot move the initializer either
if (initializer)
{
bool cannotDoInitializer = false;
for (auto itr = lastInst->m_Uses.begin(); itr != lastInst->m_Uses.end(); itr++)
{
const Instruction::Use &u = *itr;
if (u.m_Inst < li.m_StartLoop || u.m_Inst > li.m_EndLoop)
{
cannotDoInitializer = true;
break;
}
// Also check that the uses are not vector ops (temp splitting has already pulled everything to .x if this is a standalone var)
if (u.m_Op->GetAccessMask() != 1)
{
cannotDoInitializer = true;
break;
}
}
// Has outside uses? we cannot pull that up to the for statement
if (cannotDoInitializer)
initializer = 0;
}
if (initializer)
{
// We can declare the initializer in the for loop header, allocate a new number for it and change all uses into that.
uint32_t newRegister = phase.m_NextFreeTempRegister++;
li.m_StartLoop->m_InductorRegister = newRegister;
std::for_each(initializer->m_Uses.begin(), initializer->m_Uses.end(), [newRegister](const Instruction::Use &u)
{
u.m_Op->m_ForLoopInductorName = newRegister;
});
// Also tweak the destinations for cmpInst, and lastInst
if (cmpInst->asOperands[1].eType == OPERAND_TYPE_TEMP && cmpInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
cmpInst->asOperands[1].m_ForLoopInductorName = newRegister;
else
cmpInst->asOperands[2].m_ForLoopInductorName = newRegister;
if (lastInst->asOperands[1].eType == OPERAND_TYPE_TEMP && lastInst->asOperands[1].ui32RegisterNumber == initializer->asOperands[0].ui32RegisterNumber)
lastInst->asOperands[1].m_ForLoopInductorName = newRegister;
else
lastInst->asOperands[2].m_ForLoopInductorName = newRegister;
lastInst->asOperands[0].m_ForLoopInductorName = newRegister;
initializer->asOperands[0].m_ForLoopInductorName = newRegister;
}
// This loop can be transformed to for-loop. Do the necessary magicks.
li.m_StartLoop->m_LoopInductors[0] = initializer;
li.m_StartLoop->m_LoopInductors[1] = cmpInst;
li.m_StartLoop->m_LoopInductors[2] = breakInst;
li.m_StartLoop->m_LoopInductors[3] = lastInst;
if (initializer)
initializer->m_SkipTranslation = true;
cmpInst->m_SkipTranslation = true;
breakInst->m_SkipTranslation = true;
lastInst->m_SkipTranslation = true;
}
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase)
{
Loops loops;
BuildLoopInfo(phase, loops);
std::for_each(loops.begin(), loops.end(), [&phase, psContext](LoopInfo &li)
{
// Some sanity checks: start and end points must be initialized, we shouldn't have any switches here, and each loop must have at least one exit point
// Also that there's at least 2 instructions in loop body
ASSERT(li.m_StartLoop != 0);
ASSERT(li.m_EndLoop != 0);
ASSERT(li.m_EndLoop > li.m_StartLoop + 2);
ASSERT(!li.m_IsSwitch);
ASSERT(!li.m_ExitPoints.empty());
AttemptLoopTransform(psContext, phase, li);
});
}
}

View File

@ -1,641 +0,0 @@
#include "internal_includes/Operand.h"
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include "internal_includes/Shader.h"
#include "internal_includes/HLSLCrossCompilerContext.h"
#include "internal_includes/Instruction.h"
uint32_t Operand::GetAccessMask() const
{
int i;
uint32_t accessMask = 0;
// NOTE: Destination writemask can (AND DOES) affect access from sources, but we do it conservatively for now.
switch (eSelMode)
{
default:
case OPERAND_4_COMPONENT_MASK_MODE:
// Update access mask
accessMask = ui32CompMask;
if (accessMask == 0)
accessMask = OPERAND_4_COMPONENT_MASK_ALL;
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
accessMask = 0;
for (i = 0; i < 4; i++)
accessMask |= 1 << (aui32Swizzle[i]);
break;
case OPERAND_4_COMPONENT_SELECT_1_MODE:
accessMask = 1 << (aui32Swizzle[0]);
break;
}
ASSERT(accessMask != 0);
return accessMask;
}
int Operand::GetMaxComponent() const
{
if (iWriteMaskEnabled &&
iNumComponents == 4)
{
//Component Mask
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
if (ui32CompMask != 0 && ui32CompMask != (OPERAND_4_COMPONENT_MASK_X | OPERAND_4_COMPONENT_MASK_Y | OPERAND_4_COMPONENT_MASK_Z | OPERAND_4_COMPONENT_MASK_W))
{
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_W)
{
return 4;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Z)
{
return 3;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_Y)
{
return 2;
}
if (ui32CompMask & OPERAND_4_COMPONENT_MASK_X)
{
return 1;
}
}
}
else
//Component Swizzle
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == NO_SWIZZLE)
return 4;
uint32_t res = 0;
for (int i = 0; i < 4; i++)
{
res = std::max(aui32Swizzle[i], res);
}
return (int)res + 1;
}
else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
return 1;
}
}
return 4;
}
//Single component repeated
//e..g .wwww
bool Operand::IsSwizzleReplicated() const
{
if (iWriteMaskEnabled &&
iNumComponents == 4)
{
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == WWWW_SWIZZLE ||
ui32Swizzle == ZZZZ_SWIZZLE ||
ui32Swizzle == YYYY_SWIZZLE ||
ui32Swizzle == XXXX_SWIZZLE)
{
return true;
}
}
}
return false;
}
// Get the number of elements returned by operand, taking additional component mask into account
uint32_t Operand::GetNumSwizzleElements(uint32_t _ui32CompMask /* = OPERAND_4_COMPONENT_MASK_ALL */) const
{
uint32_t count = 0;
switch (eType)
{
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
// Adjust component count and break to more processing
((Operand *)this)->iNumComponents = 3;
break;
case OPERAND_TYPE_IMMEDIATE32:
case OPERAND_TYPE_IMMEDIATE64:
case OPERAND_TYPE_OUTPUT_DEPTH_GREATER_EQUAL:
case OPERAND_TYPE_OUTPUT_DEPTH_LESS_EQUAL:
case OPERAND_TYPE_OUTPUT_DEPTH:
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
uint32_t compMask = (1 << iNumComponents) - 1;
compMask &= _ui32CompMask;
// Calculate bits left in compMask
return HLSLcc::GetNumberBitsSet(compMask);
}
default:
{
break;
}
}
if (iWriteMaskEnabled &&
iNumComponents != 1)
{
//Component Mask
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
uint32_t compMask = ui32CompMask;
if (compMask == 0)
compMask = OPERAND_4_COMPONENT_MASK_ALL;
compMask &= _ui32CompMask;
if (compMask == OPERAND_4_COMPONENT_MASK_ALL)
return 4;
if (compMask & OPERAND_4_COMPONENT_MASK_X)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_Y)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_Z)
{
count++;
}
if (compMask & OPERAND_4_COMPONENT_MASK_W)
{
count++;
}
}
else
//Component Swizzle
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
uint32_t i;
for (i = 0; i < 4; ++i)
{
if ((_ui32CompMask & (1 << i)) == 0)
continue;
count++;
}
}
else if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
if (aui32Swizzle[0] == OPERAND_4_COMPONENT_X && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_X))
{
count++;
}
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Y && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Y))
{
count++;
}
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_Z && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_Z))
{
count++;
}
else if (aui32Swizzle[0] == OPERAND_4_COMPONENT_W && (_ui32CompMask & OPERAND_4_COMPONENT_MASK_W))
{
count++;
}
}
//Component Select 1
}
if (!count)
{
// Translate numComponents into bitmask
// 1 -> 1, 2 -> 3, 3 -> 7 and 4 -> 15
uint32_t compMask = (1 << iNumComponents) - 1;
compMask &= _ui32CompMask;
// Calculate bits left in compMask
return HLSLcc::GetNumberBitsSet(compMask);
}
return count;
}
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
int Operand::GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const
{
if (eShaderType != HULL_SHADER && eShaderType != DOMAIN_SHADER)
return 0;
if (eShaderType == HULL_SHADER && eShaderPhaseType == HS_CTRL_POINT_PHASE)
return 0;
if (eShaderType == DOMAIN_SHADER && eType == OPERAND_TYPE_OUTPUT)
return 0;
if (eType == OPERAND_TYPE_INPUT_CONTROL_POINT || eType == OPERAND_TYPE_OUTPUT_CONTROL_POINT)
return 0;
return 1;
}
int Operand::GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const
{
return GetRegisterSpace(psContext->psShader->eShaderType, psContext->psShader->asPhases[psContext->currentPhase].ePhase);
}
SHADER_VARIABLE_TYPE Operand::GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates /* = SVT_INT */) const
{
// indexable temps (temp arrays) are always float
if (eType == OPERAND_TYPE_INDEXABLE_TEMP)
return SVT_FLOAT;
// The min precision qualifier overrides all of the stuff below
switch (eMinPrecision)
{
case OPERAND_MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case OPERAND_MIN_PRECISION_FLOAT_2_8:
return SVT_FLOAT10;
case OPERAND_MIN_PRECISION_SINT_16:
return SVT_INT16;
case OPERAND_MIN_PRECISION_UINT_16:
return SVT_UINT16;
default:
break;
}
switch (eType)
{
case OPERAND_TYPE_TEMP:
{
SHADER_VARIABLE_TYPE eCurrentType = SVT_FLOAT;
int i = 0;
if (eSelMode == OPERAND_4_COMPONENT_SELECT_1_MODE)
{
return aeDataType[aui32Swizzle[0]];
}
if (eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
{
if (ui32Swizzle == (NO_SWIZZLE))
{
return aeDataType[0];
}
return aeDataType[aui32Swizzle[0]];
}
if (eSelMode == OPERAND_4_COMPONENT_MASK_MODE)
{
uint32_t mask = ui32CompMask;
if (!mask)
{
mask = OPERAND_4_COMPONENT_MASK_ALL;
}
for (; i < 4; ++i)
{
if (mask & (1 << i))
{
eCurrentType = aeDataType[i];
break;
}
}
#ifdef _DEBUG
//Check if all elements have the same basic type.
for (; i < 4; ++i)
{
if (mask & (1 << i))
{
if (eCurrentType != aeDataType[i])
{
ASSERT(0);
}
}
}
#endif
return eCurrentType;
}
ASSERT(0);
break;
}
case OPERAND_TYPE_OUTPUT:
{
const uint32_t ui32Register = ui32RegisterNumber;
int regSpace = GetRegisterSpace(psContext);
const ShaderInfo::InOutSignature* psOut = NULL;
if (regSpace == 0)
psContext->psShader->sInfo.GetOutputSignatureFromRegister(ui32Register, GetAccessMask(), psContext->psShader->ui32CurrentVertexOutputStream,
&psOut);
else
{
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psOut, true);
if (!psOut)
return SVT_FLOAT;
}
ASSERT(psOut != NULL);
if (psOut->eMinPrec != MIN_PRECISION_DEFAULT)
{
switch (psOut->eMinPrec)
{
default:
ASSERT(0);
break;
case MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case MIN_PRECISION_FLOAT_2_8:
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return SVT_FLOAT16;
else
return SVT_FLOAT10;
case MIN_PRECISION_SINT_16:
return SVT_INT16;
case MIN_PRECISION_UINT_16:
return SVT_UINT16;
}
}
if (psOut->eComponentType == INOUT_COMPONENT_UINT32)
{
return SVT_UINT;
}
else if (psOut->eComponentType == INOUT_COMPONENT_SINT32)
{
return SVT_INT;
}
return SVT_FLOAT;
break;
}
case OPERAND_TYPE_INPUT:
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
case OPERAND_TYPE_INPUT_CONTROL_POINT:
{
const uint32_t ui32Register = aui32ArraySizes[iIndexDims - 1];
int regSpace = GetRegisterSpace(psContext);
const ShaderInfo::InOutSignature* psIn = NULL;
if (regSpace == 0)
{
if (psContext->psShader->asPhases[psContext->currentPhase].acInputNeedsRedirect[ui32Register] != 0)
return SVT_FLOAT; // All combined inputs are stored as floats
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32Register, GetAccessMask(),
&psIn);
}
else
{
if (psContext->psShader->asPhases[psContext->currentPhase].acPatchConstantsNeedsRedirect[ui32Register] != 0)
return SVT_FLOAT; // All combined inputs are stored as floats
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32Register, GetAccessMask(), &psIn);
}
ASSERT(psIn != NULL);
switch (eSpecialName)
{
//UINT in DX, INT in GL.
case NAME_PRIMITIVE_ID:
case NAME_VERTEX_ID:
case NAME_INSTANCE_ID:
case NAME_RENDER_TARGET_ARRAY_INDEX:
case NAME_VIEWPORT_ARRAY_INDEX:
case NAME_SAMPLE_INDEX:
return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT;
case NAME_IS_FRONT_FACE:
return SVT_UINT;
case NAME_POSITION:
case NAME_CLIP_DISTANCE:
case NAME_CULL_DISTANCE:
return SVT_FLOAT;
default:
break;
// fall through
}
if (psIn->eSystemValueType == NAME_IS_FRONT_FACE)
return SVT_UINT;
//UINT in DX, INT in GL.
if (psIn->eSystemValueType == NAME_PRIMITIVE_ID ||
psIn->eSystemValueType == NAME_VERTEX_ID ||
psIn->eSystemValueType == NAME_INSTANCE_ID ||
psIn->eSystemValueType == NAME_RENDER_TARGET_ARRAY_INDEX ||
psIn->eSystemValueType == NAME_VIEWPORT_ARRAY_INDEX ||
psIn->eSystemValueType == NAME_SAMPLE_INDEX)
return (psContext->psShader->eTargetLanguage == LANG_METAL) ? SVT_UINT : SVT_INT;
if (psIn->eMinPrec != MIN_PRECISION_DEFAULT)
{
switch (psIn->eMinPrec)
{
default:
ASSERT(0);
break;
case MIN_PRECISION_FLOAT_16:
return SVT_FLOAT16;
case MIN_PRECISION_FLOAT_2_8:
if (psContext->psShader->eTargetLanguage == LANG_METAL)
return SVT_FLOAT16;
else
return SVT_FLOAT10;
case MIN_PRECISION_SINT_16:
return SVT_INT16;
case MIN_PRECISION_UINT_16:
return SVT_UINT16;
}
}
if (psIn->eComponentType == INOUT_COMPONENT_UINT32)
{
return SVT_UINT;
}
else if (psIn->eComponentType == INOUT_COMPONENT_SINT32)
{
return SVT_INT;
}
return SVT_FLOAT;
break;
}
case OPERAND_TYPE_CONSTANT_BUFFER:
{
const ConstantBuffer* psCBuf = NULL;
const ShaderVarType* psVarType = NULL;
int32_t rebase = -1;
bool isArray;
psContext->psShader->sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, aui32ArraySizes[0], &psCBuf);
if (psCBuf)
{
int foundVar = ShaderInfo::GetShaderVarFromOffset(aui32ArraySizes[1], aui32Swizzle, psCBuf, &psVarType, &isArray, NULL, &rebase, psContext->flags);
if (foundVar)
return psVarType->Type;
ASSERT(0);
}
else
ASSERT(0);
break;
}
case OPERAND_TYPE_IMMEDIATE32:
{
return ePreferredTypeForImmediates;
}
case OPERAND_TYPE_IMMEDIATE64:
{
return SVT_DOUBLE;
}
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
{
return SVT_UINT;
}
case OPERAND_TYPE_SPECIAL_ADDRESS:
case OPERAND_TYPE_SPECIAL_LOOPCOUNTER:
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
case OPERAND_TYPE_INPUT_PRIMITIVEID:
{
return SVT_INT;
}
case OPERAND_TYPE_INPUT_GS_INSTANCE_ID:
{
return SVT_UINT;
}
case OPERAND_TYPE_OUTPUT_COVERAGE_MASK:
{
return SVT_INT;
}
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
{
return SVT_INT;
}
case OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER: // constant array is floats everywhere except on vulkan
{
return psContext->IsVulkan() ? SVT_UINT : SVT_FLOAT;
}
default:
{
return SVT_FLOAT;
}
}
return SVT_FLOAT;
}
OPERAND_MIN_PRECISION Operand::ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec)
{
switch (ePrec)
{
default:
case REFLECT_RESOURCE_PRECISION_UNKNOWN:
case REFLECT_RESOURCE_PRECISION_LOWP:
return OPERAND_MIN_PRECISION_FLOAT_2_8;
case REFLECT_RESOURCE_PRECISION_MEDIUMP:
return OPERAND_MIN_PRECISION_FLOAT_16;
case REFLECT_RESOURCE_PRECISION_HIGHP:
return OPERAND_MIN_PRECISION_DEFAULT;
}
}
int Operand::GetNumInputElements(const HLSLCrossCompilerContext *psContext) const
{
const ShaderInfo::InOutSignature *psSig = NULL;
int regSpace = GetRegisterSpace(psContext);
switch (eType)
{
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP_FLATTENED:
case OPERAND_TYPE_INPUT_FORK_INSTANCE_ID:
case OPERAND_TYPE_OUTPUT_CONTROL_POINT_ID:
return 1;
case OPERAND_TYPE_INPUT_THREAD_ID_IN_GROUP:
case OPERAND_TYPE_INPUT_THREAD_ID:
case OPERAND_TYPE_INPUT_THREAD_GROUP_ID:
case OPERAND_TYPE_INPUT_DOMAIN_POINT:
return 3;
default:
break;
}
if (regSpace == 0)
psContext->psShader->sInfo.GetInputSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
else
psContext->psShader->sInfo.GetPatchConstantSignatureFromRegister(ui32RegisterNumber, GetAccessMask(), &psSig);
ASSERT(psSig != NULL);
return HLSLcc::GetNumberBitsSet(psSig->ui32Mask);
}
Operand* Operand::GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const
{
Operand *psDynIndexOp = m_SubOperands[0].get();
if (psDynIndexOp == NULL)
psDynIndexOp = m_SubOperands[1].get();
*needsIndexCalcRevert = false;
if (psDynIndexOp != NULL && isAoS)
{
// if dynamically indexing array of structs, try using the original index var before the float4 address calc
bool indexVarFound = false;
*needsIndexCalcRevert = true;
Instruction *psDynIndexOrigin = psDynIndexOp->m_Defines[0].m_Inst;
Operand *asOps = psDynIndexOrigin->asOperands;
Operand *psOriginOp = NULL;
// DXBC always addresses as float4, find the address calculation
// Special case where struct is float4 size, no extra calc is done
if (ShaderInfo::GetCBVarSize(psVar->Parent, true) <= 16) // matrixAsVectors arg does not matter here as with matrices the size will go over the limit anyway
{
indexVarFound = true;
*needsIndexCalcRevert = false;
}
else if (psDynIndexOrigin->eOpcode == OPCODE_IMUL)
{
// check which one of the src operands is the original index
if ((asOps[2].eType == OPERAND_TYPE_TEMP || asOps[2].eType == OPERAND_TYPE_INPUT || asOps[2].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[3].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[2];
else if ((asOps[3].eType == OPERAND_TYPE_TEMP || asOps[3].eType == OPERAND_TYPE_INPUT || asOps[3].eType == OPERAND_TYPE_CONSTANT_BUFFER) && asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[3];
}
else if (psDynIndexOrigin->eOpcode == OPCODE_ISHL)
{
if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32 && asOps[1].eSelMode == OPERAND_4_COMPONENT_SWIZZLE_MODE)
psOriginOp = &asOps[0];
else if (asOps[2].eType == OPERAND_TYPE_IMMEDIATE32)
psOriginOp = &asOps[1];
}
if (psOriginOp != NULL)
{
indexVarFound = true;
// Check if the mul dest is not the same temp as the src. Also check that the temp
// does not have multiple uses (which could override the value)
// -> we can use src straight and no index revert calc is needed
if ((psOriginOp->eType == OPERAND_TYPE_INPUT)
|| ((psOriginOp->ui32RegisterNumber != psDynIndexOp->ui32RegisterNumber || psOriginOp->GetDataType(psContext) != psDynIndexOp->GetDataType(psContext))
&& (!psOriginOp->m_Defines.empty()) && psOriginOp->m_Defines[0].m_Inst->m_Uses.size() == 1))
{
psDynIndexOp = psOriginOp;
*needsIndexCalcRevert = false;
}
}
// Atm we support only this very basic case of dynamic indexing array of structs.
// Return error if something else is encountered.
if (!indexVarFound)
psContext->m_Reflection.OnDiagnostics("Unsupported dynamic indexing scheme on constant buffer vars.", 0, true);
}
return psDynIndexOp;
}

View File

@ -1,989 +0,0 @@
#include "internal_includes/Shader.h"
#include "internal_includes/debug.h"
#include <algorithm>
#include "internal_includes/Instruction.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/HLSLccToolkit.h"
uint32_t Shader::GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const
{
switch (eType)
{
case SVT_FLOAT:
return psFloatTempSizes[ui32Reg];
case SVT_FLOAT16:
return psFloat16TempSizes[ui32Reg];
case SVT_FLOAT10:
return psFloat10TempSizes[ui32Reg];
case SVT_INT:
return psIntTempSizes[ui32Reg];
case SVT_INT16:
return psInt16TempSizes[ui32Reg];
case SVT_INT12:
return psInt12TempSizes[ui32Reg];
case SVT_UINT:
return psUIntTempSizes[ui32Reg];
case SVT_UINT16:
return psUInt16TempSizes[ui32Reg];
case SVT_DOUBLE:
return psDoubleTempSizes[ui32Reg];
case SVT_BOOL:
return psBoolTempSizes[ui32Reg];
default:
ASSERT(0);
}
return 0;
}
void Shader::ConsolidateHullTempVars()
{
uint32_t i, phase;
uint32_t numTemps = 0;
for (phase = 0; phase < asPhases.size(); phase++)
{
for (i = 0; i < asPhases[phase].psDecl.size(); i++)
{
if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
{
if (asPhases[phase].psDecl[i].value.ui32NumTemps > numTemps)
numTemps = asPhases[phase].psDecl[i].value.ui32NumTemps;
asPhases[phase].psDecl[i].value.ui32NumTemps = 0;
}
}
}
// Now we have the max temps, write it back to the first one we see.
for (phase = 0; phase < asPhases.size(); phase++)
{
for (i = 0; i < asPhases[phase].psDecl.size(); i++)
{
if (asPhases[phase].psDecl[i].eOpcode == OPCODE_DCL_TEMPS)
{
asPhases[phase].psDecl[i].value.ui32NumTemps = numTemps;
return;
}
}
}
}
// Image (RWTexture in HLSL) declaration op does not provide enough info about the format and accessing.
// Go through all image declarations and instructions accessing it to see if it is readonly/writeonly.
// While doing that we also get the number of components expected in the image format.
// Also resolve access flags for other UAVs as well. No component count resolving for them.
void ShaderPhase::ResolveUAVProperties(const ShaderInfo& sInfo)
{
Declaration *psFirstDeclaration = &psDecl[0];
uint32_t ui32NumDeclarations = (uint32_t)psDecl.size();
Instruction *psFirstInstruction = &psInst[0];
uint32_t ui32NumInstructions = (uint32_t)psInst.size();
if (ui32NumDeclarations == 0 || ui32NumInstructions == 0)
return;
Declaration *psLastDeclaration = psFirstDeclaration + ui32NumDeclarations - 1;
Instruction *psLastInstruction = psFirstInstruction + ui32NumInstructions - 1;
Declaration *psDecl;
for (psDecl = psFirstDeclaration; psDecl <= psLastDeclaration; psDecl++)
{
Instruction *psInst;
uint32_t uavReg;
if (psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED &&
psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_STRUCTURED &&
psDecl->eOpcode != OPCODE_DCL_UNORDERED_ACCESS_VIEW_RAW)
continue;
uavReg = psDecl->asOperands[0].ui32RegisterNumber;
for (psInst = psFirstInstruction; psInst <= psLastInstruction; psInst++)
{
uint32_t opIndex;
uint32_t accessFlags;
uint32_t numComponents;
switch (psInst->eOpcode)
{
case OPCODE_LD_UAV_TYPED:
opIndex = 2;
accessFlags = ACCESS_FLAG_READ;
numComponents = psInst->asOperands[0].GetNumSwizzleElements(); // get component count from the write target
break;
case OPCODE_STORE_UAV_TYPED:
ASSERT(psInst->asOperands[0].eType == OPERAND_TYPE_UNORDERED_ACCESS_VIEW);
opIndex = 0;
accessFlags = ACCESS_FLAG_WRITE;
numComponents = 0; // store op does not contribute on the component count resolving
break;
case OPCODE_ATOMIC_CMP_STORE:
case OPCODE_ATOMIC_AND:
case OPCODE_ATOMIC_IADD:
case OPCODE_ATOMIC_OR:
case OPCODE_ATOMIC_XOR:
case OPCODE_ATOMIC_IMIN:
case OPCODE_ATOMIC_UMIN:
case OPCODE_ATOMIC_IMAX:
case OPCODE_ATOMIC_UMAX:
opIndex = 0;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 1;
break;
case OPCODE_IMM_ATOMIC_AND:
case OPCODE_IMM_ATOMIC_IADD:
case OPCODE_IMM_ATOMIC_IMAX:
case OPCODE_IMM_ATOMIC_IMIN:
case OPCODE_IMM_ATOMIC_UMAX:
case OPCODE_IMM_ATOMIC_UMIN:
case OPCODE_IMM_ATOMIC_OR:
case OPCODE_IMM_ATOMIC_XOR:
case OPCODE_IMM_ATOMIC_EXCH:
case OPCODE_IMM_ATOMIC_CMP_EXCH:
opIndex = 1;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 1;
break;
// The rest of the ops here are only for buffer UAVs. No need for component count resolving.
case OPCODE_LD_STRUCTURED:
opIndex = 3;
accessFlags = ACCESS_FLAG_READ;
numComponents = 0;
break;
case OPCODE_STORE_STRUCTURED:
opIndex = 0;
accessFlags = ACCESS_FLAG_WRITE;
numComponents = 0;
break;
case OPCODE_LD_RAW:
opIndex = 2;
accessFlags = ACCESS_FLAG_READ;
numComponents = 0;
break;
case OPCODE_STORE_RAW:
opIndex = 0;
accessFlags = ACCESS_FLAG_WRITE;
numComponents = 0;
break;
case OPCODE_IMM_ATOMIC_ALLOC:
case OPCODE_IMM_ATOMIC_CONSUME:
opIndex = 1;
accessFlags = ACCESS_FLAG_READ | ACCESS_FLAG_WRITE | ACCESS_FLAG_ATOMIC;
numComponents = 0;
break;
default:
continue;
}
// Buffer loads can also happen on non-uav. Skip those.
if (psInst->asOperands[opIndex].eType != OPERAND_TYPE_UNORDERED_ACCESS_VIEW)
continue;
// Check the instruction is operating on the declared uav
if (psInst->asOperands[opIndex].ui32RegisterNumber != uavReg)
continue;
psDecl->sUAV.ui32AccessFlags |= accessFlags;
// get the max components accessed, but only for typed (texture) UAVs
if (numComponents > psDecl->sUAV.ui32NumComponents && psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
{
psDecl->sUAV.ui32NumComponents = numComponents;
}
}
if (psDecl->eOpcode == OPCODE_DCL_UNORDERED_ACCESS_VIEW_TYPED)
{
const ResourceBinding* psBinding = 0;
if (sInfo.GetResourceFromBindingPoint(RGROUP_UAV, uavReg, &psBinding))
{
// component count is stored in flags as 2 bits, 00: vec1, 01: vec2, 10: vec3, 11: vec4
psDecl->sUAV.ui32NumComponents = ((psBinding->ui32Flags >> 2) & 3) + 1;
}
}
}
}
static void GatherOperandAccessMasks(const Operand *psOperand, char *destTable)
{
int i;
uint32_t reg;
for (i = 0; i < MAX_SUB_OPERANDS; i++)
{
if (psOperand->m_SubOperands[i].get())
GatherOperandAccessMasks(psOperand->m_SubOperands[i].get(), destTable);
}
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
reg = psOperand->ui32RegisterNumber & 0xffff; // We add 0x10000 to all newly created ones earlier
destTable[reg] |= (char)psOperand->GetAccessMask();
}
// Coalesce the split temps back based on their original temp register. Keep uint/int/float operations separate
static void CoalesceTemps(Shader *psShader, ShaderPhase *psPhase, uint32_t ui32MaxOrigTemps)
{
// Just move all operations back to their original registers, but keep the data type assignments.
uint32_t i, k;
Instruction *psLastInstruction = &psPhase->psInst[psPhase->psInst.size() - 1];
std::vector<char> opAccessMasks;
// First move all newly created temps to high enough so they won't overlap with the rebased ones
Instruction *inst = &psPhase->psInst[0];
if (psPhase->psInst.size() == 0 || psPhase->ui32OrigTemps == 0)
return;
while (inst <= psLastInstruction)
{
// Update all operands and their suboperands
for (i = psPhase->ui32OrigTemps; i < psPhase->ui32TotalTemps; i++)
{
for (k = 0; k < inst->ui32NumOperands; k++)
inst->ChangeOperandTempRegister(&inst->asOperands[k], i, 0x10000 + i, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, 0);
}
inst++;
}
// Prune the original registers, rebase if necessary
opAccessMasks.clear();
opAccessMasks.resize(psPhase->ui32TotalTemps, 0);
inst = &psPhase->psInst[0];
while (inst <= psLastInstruction)
{
for (k = 0; k < inst->ui32NumOperands; k++)
GatherOperandAccessMasks(&inst->asOperands[k], &opAccessMasks[0]);
inst++;
}
for (i = 0; i < psPhase->ui32TotalTemps; i++)
{
uint32_t rebase, count;
uint32_t newReg = i;
uint32_t origReg = i;
int needsMoving = 0;
SHADER_VARIABLE_TYPE dataType;
// Figure out rebase and count
rebase = 0;
count = 0;
if (i < psPhase->ui32OrigTemps)
{
// One of the original registers
k = opAccessMasks[i];
if (k == 0)
continue;
while ((k & 1) == 0)
{
rebase++;
k = k >> 1;
}
while (k != 0)
{
count++;
k = k >> 1;
}
newReg = i + ui32MaxOrigTemps * rebase;
if (rebase != 0)
needsMoving = 1;
}
else
{
// Newly created split registers, read info from table
// Read the count and rebase from split info table
count = (psPhase->pui32SplitInfo[i] >> 24) & 0xff;
rebase = (psPhase->pui32SplitInfo[i] >> 16) & 0xff;
origReg = 0x10000 + i;
newReg = (psPhase->pui32SplitInfo[i]) & 0xffff;
while (psPhase->pui32SplitInfo[newReg] != 0xffffffff)
newReg = (psPhase->pui32SplitInfo[newReg]) & 0xffff;
// If count is 4, verify that we have both first and last bit set
ASSERT(count != 4 || (opAccessMasks[i] & 9) == 9);
newReg = newReg + ui32MaxOrigTemps * rebase;
// Don't rebase again
rebase = 0;
needsMoving = 1;
}
if (needsMoving)
{
// printf("Moving reg %d to %d, count %d rebase %d\n", origReg, newReg, count, rebase);
// Move directly to correct location
inst = &psPhase->psInst[0];
while (inst <= psLastInstruction)
{
for (k = 0; k < inst->ui32NumOperands; k++)
inst->ChangeOperandTempRegister(&inst->asOperands[k], origReg, newReg, OPERAND_4_COMPONENT_MASK_ALL, UD_CHANGE_ALL, rebase);
inst++;
}
}
// Mark the count
dataType = psPhase->peTempTypes[i * 4 + rebase];
switch (dataType)
{
default:
ASSERT(0);
break;
case SVT_BOOL:
psShader->psBoolTempSizes[newReg] = std::max(psShader->psBoolTempSizes[newReg], (char)count);
break;
case SVT_FLOAT:
psShader->psFloatTempSizes[newReg] = std::max(psShader->psFloatTempSizes[newReg], (char)count);
break;
case SVT_FLOAT16:
psShader->psFloat16TempSizes[newReg] = std::max(psShader->psFloat16TempSizes[newReg], (char)count);
break;
case SVT_FLOAT10:
psShader->psFloat10TempSizes[newReg] = std::max(psShader->psFloat10TempSizes[newReg], (char)count);
break;
case SVT_INT:
psShader->psIntTempSizes[newReg] = std::max(psShader->psIntTempSizes[newReg], (char)count);
break;
case SVT_INT16:
psShader->psInt16TempSizes[newReg] = std::max(psShader->psInt16TempSizes[newReg], (char)count);
break;
case SVT_INT12:
psShader->psInt12TempSizes[newReg] = std::max(psShader->psInt12TempSizes[newReg], (char)count);
break;
case SVT_UINT:
psShader->psUIntTempSizes[newReg] = std::max(psShader->psUIntTempSizes[newReg], (char)count);
break;
case SVT_UINT16:
psShader->psUInt16TempSizes[newReg] = std::max(psShader->psUInt16TempSizes[newReg], (char)count);
break;
case SVT_DOUBLE:
psShader->psDoubleTempSizes[newReg] = std::max(psShader->psDoubleTempSizes[newReg], (char)count);
break;
}
}
}
// Mark whether the temp registers are used per each data type.
void Shader::PruneTempRegisters()
{
uint32_t k;
uint32_t maxOrigTemps = 0;
uint32_t maxTotalTemps = 0;
// First find the total amount of temps
for (k = 0; k < asPhases.size(); k++)
{
ShaderPhase *psPhase = &asPhases[k];
maxOrigTemps = std::max(maxOrigTemps, psPhase->ui32OrigTemps);
maxTotalTemps = std::max(maxTotalTemps, psPhase->ui32TotalTemps);
}
if (maxTotalTemps == 0)
return; // splitarrays are nulls, no need to free
// Allocate and zero-initialize arrays for each temp sizes. *4 is for every possible rebase
psIntTempSizes.clear();
psIntTempSizes.resize(maxOrigTemps * 4, 0);
psInt12TempSizes.clear();
psInt12TempSizes.resize(maxOrigTemps * 4, 0);
psInt16TempSizes.clear();
psInt16TempSizes.resize(maxOrigTemps * 4, 0);
psUIntTempSizes.clear();
psUIntTempSizes.resize(maxOrigTemps * 4, 0);
psUInt16TempSizes.clear();
psUInt16TempSizes.resize(maxOrigTemps * 4, 0);
psFloatTempSizes.clear();
psFloatTempSizes.resize(maxOrigTemps * 4, 0);
psFloat16TempSizes.clear();
psFloat16TempSizes.resize(maxOrigTemps * 4, 0);
psFloat10TempSizes.clear();
psFloat10TempSizes.resize(maxOrigTemps * 4, 0);
psDoubleTempSizes.clear();
psDoubleTempSizes.resize(maxOrigTemps * 4, 0);
psBoolTempSizes.clear();
psBoolTempSizes.resize(maxOrigTemps * 4, 0);
for (k = 0; k < asPhases.size(); k++)
{
ShaderPhase *psPhase = &asPhases[k];
CoalesceTemps(this, psPhase, maxOrigTemps);
if (psPhase->psTempDeclaration)
psPhase->psTempDeclaration->value.ui32NumTemps = maxOrigTemps * 4;
}
}
static void DoSignatureAnalysis(std::vector<ShaderInfo::InOutSignature> &psSignatures, std::vector<unsigned char> &outTable)
{
// Fill the char, 2 bits per component so that each 2 bits encode the following info:
// 0: unused OR used by the first signature we happened to see
// 1: used by the second signature
// 2: used by the third sig
// 3: used by the fourth sig.
// The counters for each input/output/patch. Start with 8 registers, grow as needed
std::vector<unsigned char> counters(8, (unsigned char)0);
outTable.clear();
outTable.resize(8, (unsigned char)0);
size_t i;
for (i = 0; i < psSignatures.size(); i++)
{
ShaderInfo::InOutSignature *psSig = &psSignatures[i];
char currCounter;
char mask;
ASSERT(psSig != NULL);
// We'll skip SV_Depth and others that put -1 to the register.
if (psSig->ui32Register == 0xffffffffu)
continue;
// Make sure there's enough room in the table
if (psSig->ui32Register >= counters.size())
{
counters.resize(psSig->ui32Register * 2, 0);
outTable.resize(psSig->ui32Register * 2, 0);
}
// Apply counter value to masked items
currCounter = counters[psSig->ui32Register];
// Duplicate counter bits
currCounter = currCounter | (currCounter << 2) | (currCounter << 4) | (currCounter << 6);
// Widen the mask
mask = (unsigned char)psSig->ui32Mask;
mask = ((mask & 8) << 3) | ((mask & 4) << 2) | ((mask & 2) << 1) | (mask & 1);
mask = mask | (mask << 1);
// Write output
outTable[psSig->ui32Register] |= (currCounter & mask);
// Update counter
counters[psSig->ui32Register]++;
}
}
void Shader::DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand)
{
uint32_t i;
uint32_t regSpace = psOperand->GetRegisterSpace(eShaderType, psPhase->ePhase);
unsigned char *redirectTable = NULL;
unsigned char redir = 0;
unsigned char firstFound = 0;
uint32_t mask;
for (i = 0; i < MAX_SUB_OPERANDS; i++)
if (psOperand->m_SubOperands[i].get())
DoIOOverlapOperand(psPhase, psOperand->m_SubOperands[i].get());
switch (psOperand->eType)
{
case OPERAND_TYPE_INPUT:
case OPERAND_TYPE_INPUT_CONTROL_POINT:
case OPERAND_TYPE_INPUT_PATCH_CONSTANT:
redirectTable = regSpace == 0 ? &psPhase->acInputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0];
break;
case OPERAND_TYPE_OUTPUT:
case OPERAND_TYPE_OUTPUT_CONTROL_POINT:
redirectTable = regSpace == 0 ? &psPhase->acOutputNeedsRedirect[0] : &psPhase->acPatchConstantsNeedsRedirect[0];
break;
default:
// Not a input or output, nothing to do here
return;
}
redir = redirectTable[psOperand->ui32RegisterNumber];
if (redir == 0xff) // Already found overlap?
return;
mask = psOperand->GetAccessMask();
i = 0;
// Find the first mask bit set.
while ((mask & (1 << i)) == 0)
i++;
firstFound = (redir >> (i * 2)) & 3;
for (; i < 4; i++)
{
unsigned char sig;
if ((mask & (1 << i)) == 0)
continue;
sig = (redir >> (i * 2)) & 3;
// All set bits must access the same signature
if (sig != firstFound)
{
redirectTable[psOperand->ui32RegisterNumber] = 0xff;
return;
}
}
}
static void PruneRedirectEntry(unsigned char &itr)
{
if (itr != 0xff)
itr = 0;
}
// Check if inputs and outputs are accessed across semantic boundaries
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
void Shader::AnalyzeIOOverlap()
{
uint32_t i, k;
std::vector<unsigned char> outData;
DoSignatureAnalysis(sInfo.psInputSignatures, outData);
// Now data has the values, copy them to all phases
for (i = 0; i < asPhases.size(); i++)
asPhases[i].acInputNeedsRedirect = outData;
DoSignatureAnalysis(sInfo.psOutputSignatures, outData);
for (i = 0; i < asPhases.size(); i++)
asPhases[i].acOutputNeedsRedirect = outData;
DoSignatureAnalysis(sInfo.psPatchConstantSignatures, outData);
for (i = 0; i < asPhases.size(); i++)
asPhases[i].acPatchConstantsNeedsRedirect = outData;
// Now walk through all operands and suboperands in all instructions and write 0xff to the dest (cannot occur otherwise)
// if we're crossing signature borders
for (i = 0; i < asPhases.size(); i++)
{
ShaderPhase *psPhase = &asPhases[i];
for (k = 0; k < psPhase->psInst.size(); k++)
{
Instruction *psInst = &psPhase->psInst[k];
uint32_t j;
for (j = 0; j < psInst->ui32NumOperands; j++)
DoIOOverlapOperand(psPhase, &psInst->asOperands[j]);
}
// Now prune all tables from anything except 0xff.
std::for_each(psPhase->acInputNeedsRedirect.begin(), psPhase->acInputNeedsRedirect.end(), PruneRedirectEntry);
std::for_each(psPhase->acOutputNeedsRedirect.begin(), psPhase->acOutputNeedsRedirect.end(), PruneRedirectEntry);
std::for_each(psPhase->acPatchConstantsNeedsRedirect.begin(), psPhase->acPatchConstantsNeedsRedirect.end(), PruneRedirectEntry);
}
}
void Shader::SetMaxSemanticIndex()
{
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psInputSignatures.begin(); it != sInfo.psInputSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psOutputSignatures.begin(); it != sInfo.psOutputSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
for (std::vector<ShaderInfo::InOutSignature>::iterator it = sInfo.psPatchConstantSignatures.begin(); it != sInfo.psPatchConstantSignatures.end(); ++it)
maxSemanticIndex = std::max(maxSemanticIndex, it->ui32SemanticIndex);
}
// In DX bytecode, all const arrays are vec4's, and all arrays are stuffed to one large array.
// Luckily, each chunk is always accessed with suboperand plus <constant> (in ui32RegisterNumber)
// So do an analysis pass. Also trim the vec4's into smaller formats if the extra components are never read.
void ShaderPhase::PruneConstArrays()
{
using namespace std;
auto customDataItr = find_if(psDecl.begin(), psDecl.end(), [](const Declaration &d) { return d.eOpcode == OPCODE_CUSTOMDATA; });
// Not found? We're done.
if (customDataItr == psDecl.end())
return;
// Store the original declaration
m_ConstantArrayInfo.m_OrigDeclaration = &(*customDataItr);
// Loop through each operand and pick up usage masks
HLSLcc::ForEachOperand(psInst.begin(), psInst.end(), FEO_FLAG_ALL, [this](const std::vector<Instruction>::iterator &psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
using namespace std;
if (psOperand->eType == OPERAND_TYPE_IMMEDIATE_CONSTANT_BUFFER)
{
uint32_t accessMask = psOperand->GetAccessMask();
uint32_t offset = psOperand->ui32RegisterNumber;
// Update the chunk access mask
// Find all existing entries that have anything common with the access mask
auto cbrange = m_ConstantArrayInfo.m_Chunks.equal_range(offset);
vector<ChunkMap::iterator> matchingEntries;
for (auto itr = cbrange.first; itr != cbrange.second; itr++)
{
if ((itr->second.m_AccessMask & accessMask) != 0)
{
matchingEntries.push_back(itr);
}
}
if (matchingEntries.empty())
{
// Not found, create new entry
m_ConstantArrayInfo.m_Chunks.insert(make_pair(offset, ConstantArrayChunk(0u, accessMask, (Operand *)psOperand)));
}
else if (matchingEntries.size() == 1)
{
// Update access mask of the one existing entry
matchingEntries[0]->second.m_AccessMask |= accessMask;
matchingEntries[0]->second.m_UseSites.push_back((Operand *)psOperand);
}
else
{
// Multiple entries with (now) overlapping mask. Merge to the first one.
ChunkMap::iterator tgt = matchingEntries[0];
tgt->second.m_AccessMask |= accessMask;
tgt->second.m_UseSites.push_back((Operand *)psOperand);
ChunkMap &chunks = m_ConstantArrayInfo.m_Chunks;
for_each(matchingEntries.begin() + 1, matchingEntries.end(), [&tgt, &chunks](ChunkMap::iterator itr)
{
tgt->second.m_AccessMask |= itr->second.m_AccessMask;
chunks.erase(itr);
});
}
}
});
// Figure out how large each chunk is by finding the next chunk that uses any bits from the current mask (or the max size if not found)
uint32_t totalSize = (uint32_t)m_ConstantArrayInfo.m_OrigDeclaration->asImmediateConstBuffer.size();
for (auto chunk = m_ConstantArrayInfo.m_Chunks.begin(); chunk != m_ConstantArrayInfo.m_Chunks.end(); chunk++)
{
// Find the next chunk that shares any bits in the access mask
auto nextItr = find_if(m_ConstantArrayInfo.m_Chunks.lower_bound(chunk->first + 1), m_ConstantArrayInfo.m_Chunks.end(), [&chunk](ChunkMap::value_type &itr)
{
return (chunk->second.m_AccessMask & itr.second.m_AccessMask) != 0;
});
// Not found? Must continue until the end of array
if (nextItr == m_ConstantArrayInfo.m_Chunks.end())
chunk->second.m_Size = totalSize - chunk->first;
else
{
// Otherwise we know the chunk size directly.
chunk->second.m_Size = nextItr->first - chunk->first;
}
// Do rebase on the operands if necessary
chunk->second.m_Rebase = 0;
uint32_t t = chunk->second.m_AccessMask;
ASSERT(t != 0);
while ((t & 1) == 0)
{
chunk->second.m_Rebase++;
t >>= 1;
}
uint32_t rebase = chunk->second.m_Rebase;
uint32_t componentCount = 0;
while (t != 0)
{
componentCount++;
t >>= 1;
}
chunk->second.m_ComponentCount = componentCount;
for_each(chunk->second.m_UseSites.begin(), chunk->second.m_UseSites.end(), [&rebase, &componentCount](Operand *op)
{
// Store the rebase value to each operand and do the actual rebase.
op->m_Rebase = rebase;
op->m_Size = componentCount;
if (rebase != 0)
{
// Update component mask. Note that we don't need to do anything to the suboperands. They do not affect destination writemask.
switch (op->eSelMode)
{
case OPERAND_4_COMPONENT_MASK_MODE:
{
uint32_t oldMask = op->ui32CompMask;
if (oldMask == 0)
oldMask = OPERAND_4_COMPONENT_MASK_ALL;
// Check that we're not losing any information
ASSERT((oldMask >> rebase) << rebase == oldMask);
op->ui32CompMask = (oldMask >> rebase);
break;
}
case OPERAND_4_COMPONENT_SELECT_1_MODE:
ASSERT(op->aui32Swizzle[0] >= rebase);
op->aui32Swizzle[0] -= rebase;
break;
case OPERAND_4_COMPONENT_SWIZZLE_MODE:
{
for (int i = 0; i < 4; i++)
{
// Note that this rebase is different from the one done for source operands
ASSERT(op->aui32Swizzle[i] >= rebase);
op->aui32Swizzle[i] -= rebase;
}
break;
}
default:
ASSERT(0);
}
}
});
}
// We'll do the actual declaration and pruning later on, now that we have the info stored up.
}
HLSLcc::ControlFlow::ControlFlowGraph &ShaderPhase::GetCFG()
{
if (!m_CFGInitialized)
{
m_CFG.Build(psInst.data(), psInst.data() + psInst.size());
m_CFGInitialized = true;
}
return m_CFG;
}
void ShaderPhase::UnvectorizeImmMoves()
{
// NOTE must be called before datatype analysis and other analysis phases are done, as the pointers won't match anymore
// (we insert new instructions there)
using namespace std;
vector<Instruction> nInst;
// Reserve 1.5x space
nInst.reserve(psInst.size() * 3 / 2);
for_each(psInst.begin(), psInst.end(), [&](Instruction &i)
{
if (i.eOpcode != OPCODE_MOV || i.asOperands[0].eType != OPERAND_TYPE_TEMP || i.asOperands[1].eType != OPERAND_TYPE_IMMEDIATE32 || i.asOperands[0].GetNumSwizzleElements() == 1)
{
nInst.push_back(i);
return;
}
// Ok, found one to unvectorize.
ASSERT(i.asOperands[0].eSelMode == OPERAND_4_COMPONENT_MASK_MODE);
uint32_t mask = i.asOperands[0].ui32CompMask;
for (uint32_t j = 0; j < 4; j++)
{
if ((mask & (1 << j)) == 0)
continue;
Instruction ni = i;
ni.asOperands[0].ui32CompMask = (1 << j);
nInst.push_back(ni);
}
});
psInst.clear();
psInst.swap(nInst);
}
void ShaderPhase::ExpandSWAPCs()
{
// First find the DCL_TEMPS declaration
auto dcitr = std::find_if(psDecl.begin(), psDecl.end(), [](const Declaration &decl) -> bool { return decl.eOpcode == OPCODE_DCL_TEMPS; });
if (dcitr == psDecl.end())
{
// No temp declaration? Probably we won't have SWAPC either, then.
return;
}
Declaration &tmpDecl = *dcitr;
uint32_t extraTemp = 0;
bool extraTempAllocated = false;
// Parse through instructions, open up SWAPCs if necessary
while (1)
{
// Need to find from top every time, because we're inserting stuff into the vector
auto swapItr = std::find_if(psInst.begin(), psInst.end(), [](const Instruction &inst) -> bool { return inst.eOpcode == OPCODE_SWAPC; });
if (swapItr == psInst.end())
break;
// Ok swapItr now points to a SWAPC instruction that we'll have to split up like this (from MSDN):
/* swapc dest0[.mask],
dest1[.mask],
src0[.swizzle],
src1[.swizzle],
src2[.swizzle]
expands to :
movc temp[dest0s mask],
src0[.swizzle],
src2[.swizzle], src1[.swizzle]
movc dest1[.mask],
src0[.swizzle],
src1[.swizzle], src2[.swizzle]
mov dest0.mask, temp
*/
// Allocate a new temp, if not already done
if (!extraTempAllocated)
{
extraTemp = tmpDecl.value.ui32NumTemps++;
extraTempAllocated = true;
}
Instruction origSwapInst;
#if _DEBUG
origSwapInst.id = swapItr->id;
#endif
std::swap(*swapItr, origSwapInst); // Store the original swapc for reading
// OP 1: MOVC temp[dest0 mask], src0, src2, stc1
swapItr->eOpcode = OPCODE_MOVC;
swapItr->ui32NumOperands = 4;
swapItr->ui32FirstSrc = 1;
swapItr->asOperands[0] = origSwapInst.asOperands[0];
swapItr->asOperands[0].eType = OPERAND_TYPE_TEMP;
swapItr->asOperands[0].ui32RegisterNumber = extraTemp;
// mask is already fine
swapItr->asOperands[1] = origSwapInst.asOperands[2]; // src0
swapItr->asOperands[2] = origSwapInst.asOperands[4]; // src2
swapItr->asOperands[3] = origSwapInst.asOperands[3]; // src1
// swapItr is already in the psInst vector.
Instruction newInst[2] = { Instruction(), Instruction() };
// OP 2: MOVC dest1, src0, src1, src2
newInst[0].eOpcode = OPCODE_MOVC;
newInst[0].ui32NumOperands = 4;
newInst[0].ui32FirstSrc = 1;
newInst[0].asOperands[0] = origSwapInst.asOperands[1]; // dest1
newInst[0].asOperands[1] = origSwapInst.asOperands[2]; // src0
newInst[0].asOperands[2] = origSwapInst.asOperands[3]; // src1
newInst[0].asOperands[3] = origSwapInst.asOperands[4]; // src2
#if _DEBUG
newInst[0].id = swapItr->id;
#endif
// OP 3: mov dest0.mask, temp
newInst[1].eOpcode = OPCODE_MOV;
newInst[1].ui32NumOperands = 2;
newInst[1].ui32FirstSrc = 1;
newInst[1].asOperands[0] = origSwapInst.asOperands[0]; // dest 0
// First copy dest0 to src as well to get the mask set up correctly
newInst[1].asOperands[1] = origSwapInst.asOperands[0]; // dest 0;
// Then overwrite with temp reg
newInst[1].asOperands[1].eType = OPERAND_TYPE_TEMP;
newInst[1].asOperands[1].ui32RegisterNumber = extraTemp;
#if _DEBUG
newInst[1].id = swapItr->id;
#endif
// Insert the new instructions to the vector
psInst.insert(swapItr + 1, newInst, newInst + 2);
}
}
void Shader::ExpandSWAPCs()
{
// Just call ExpandSWAPCs for each phase
for (int i = 0; i < asPhases.size(); i++)
{
asPhases[i].ExpandSWAPCs();
}
}
void Shader::ForcePositionToHighp()
{
// Only sensible in vertex shaders (TODO: is this an issue in tessellation shaders? Do we even care?)
if (eShaderType != VERTEX_SHADER)
return;
ShaderPhase &phase = asPhases[0];
// Find the output declaration
std::vector<Declaration>::iterator itr = std::find_if(phase.psDecl.begin(), phase.psDecl.end(), [this](const Declaration &decl) -> bool
{
if (decl.eOpcode == OPCODE_DCL_OUTPUT_SIV)
{
const SPECIAL_NAME specialName = decl.asOperands[0].eSpecialName;
if (specialName == NAME_POSITION ||
specialName == NAME_UNDEFINED) // This might be SV_Position (because d3dcompiler is weird).
{
const ShaderInfo::InOutSignature *sig = NULL;
sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig);
ASSERT(sig != NULL);
if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0)
{
((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT;
return true;
}
}
return false;
}
else if (decl.eOpcode == OPCODE_DCL_OUTPUT)
{
const ShaderInfo::InOutSignature *sig = NULL;
sInfo.GetOutputSignatureFromRegister(decl.asOperands[0].ui32RegisterNumber, decl.asOperands[0].GetAccessMask(), 0, &sig);
ASSERT(sig != NULL);
if ((sig->eSystemValueType == NAME_POSITION || sig->semanticName == "POS") && sig->ui32SemanticIndex == 0)
{
((ShaderInfo::InOutSignature *)sig)->eMinPrec = MIN_PRECISION_DEFAULT;
return true;
}
return false;
}
return false;
});
// Do nothing if we don't find suitable output. This may well be INTERNALTESSPOS for tessellation etc.
if (itr == phase.psDecl.end())
return;
uint32_t outputPosReg = itr->asOperands[0].ui32RegisterNumber;
HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_DEST_OPERAND, [outputPosReg](std::vector<Instruction>::iterator itr, Operand *op, uint32_t flags)
{
if (op->eType == OPERAND_TYPE_OUTPUT && op->ui32RegisterNumber == outputPosReg)
op->eMinPrecision = OPERAND_MIN_PRECISION_DEFAULT;
});
}
void Shader::FindUnusedGlobals(uint32_t flags)
{
for (int i = 0; i < asPhases.size(); i++)
{
ShaderPhase &phase = asPhases[i];
// Loop through every operand and pick up usages
HLSLcc::ForEachOperand(phase.psInst.begin(), phase.psInst.end(), FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND, [&](std::vector<Instruction>::iterator inst, Operand *op, uint32_t flags)
{
// Not a constant buffer read? continue
if (op->eType != OPERAND_TYPE_CONSTANT_BUFFER)
return;
const uint32_t ui32BindingPoint = op->aui32ArraySizes[0];
const ConstantBuffer *psCBuf = NULL;
sInfo.GetConstantBufferFromBindingPoint(RGROUP_CBUFFER, ui32BindingPoint, &psCBuf);
if (!psCBuf)
return;
// Get all the struct members that can be reached from this usage:
uint32_t mask = op->GetAccessMask();
for (uint32_t k = 0; k < 4; k++)
{
if ((mask & (1 << k)) == 0)
continue;
uint32_t tmpSwizzle[4] = {k, k, k, k};
int rebase;
bool isArray;
ShaderVarType *psVarType = NULL;
ShaderInfo::GetShaderVarFromOffset(op->aui32ArraySizes[1], tmpSwizzle, psCBuf, (const ShaderVarType**)&psVarType, &isArray, NULL, &rebase, flags);
// Mark as used. Also all parents.
while (psVarType)
{
psVarType->m_IsUsed = true;
psVarType = psVarType->Parent;
}
}
});
}
}

View File

@ -1,520 +0,0 @@
#include "ShaderInfo.h"
#include "internal_includes/debug.h"
#include "internal_includes/tokens.h"
#include "Operand.h"
#include <stdlib.h>
#include <sstream>
#include <cctype>
SHADER_VARIABLE_TYPE ShaderInfo::GetTextureDataType(uint32_t regNo)
{
const ResourceBinding* psBinding = 0;
int found;
found = GetResourceFromBindingPoint(RGROUP_TEXTURE, regNo, &psBinding);
ASSERT(found != 0);
return psBinding->GetDataType();
}
void ShaderInfo::GetConstantBufferFromBindingPoint(const ResourceGroup eGroup, const uint32_t ui32BindPoint, const ConstantBuffer** ppsConstBuf) const
{
ASSERT(ui32MajorVersion > 3);
*ppsConstBuf = &psConstantBuffers[aui32ResourceMap[eGroup][ui32BindPoint]];
}
int ShaderInfo::GetResourceFromBindingPoint(const ResourceGroup eGroup, uint32_t const ui32BindPoint, const ResourceBinding** ppsOutBinding) const
{
size_t i;
const size_t ui32NumBindings = psResourceBindings.size();
const ResourceBinding* psBindings = &psResourceBindings[0];
for (i = 0; i < ui32NumBindings; ++i)
{
if (ResourceTypeToResourceGroup(psBindings[i].eType) == eGroup)
{
if (ui32BindPoint >= psBindings[i].ui32BindPoint && ui32BindPoint < (psBindings[i].ui32BindPoint + psBindings[i].ui32BindCount))
{
*ppsOutBinding = psBindings + i;
return 1;
}
}
}
return 0;
}
int ShaderInfo::GetInterfaceVarFromOffset(uint32_t ui32Offset, ShaderVar** ppsShaderVar) const
{
size_t i;
const size_t ui32NumVars = psThisPointerConstBuffer->asVars.size();
for (i = 0; i < ui32NumVars; ++i)
{
if (ui32Offset >= psThisPointerConstBuffer->asVars[i].ui32StartOffset &&
ui32Offset < (psThisPointerConstBuffer->asVars[i].ui32StartOffset + psThisPointerConstBuffer->asVars[i].ui32Size))
{
*ppsShaderVar = &psThisPointerConstBuffer->asVars[i];
return 1;
}
}
return 0;
}
int ShaderInfo::GetInputSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
{
size_t i;
const size_t ui32NumVars = psInputSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if ((ui32Register == psInputSignatures[i].ui32Register) && (((~psInputSignatures[i].ui32Mask) & ui32Mask) == 0))
{
*ppsOut = &psInputSignatures[i];
return 1;
}
}
ASSERT(allowNull);
return 0;
}
int ShaderInfo::GetPatchConstantSignatureFromRegister(const uint32_t ui32Register, const uint32_t ui32Mask, const InOutSignature** ppsOut, bool allowNull /* == false */) const
{
size_t i;
const size_t ui32NumVars = psPatchConstantSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if ((ui32Register == psPatchConstantSignatures[i].ui32Register) && (((~psPatchConstantSignatures[i].ui32Mask) & ui32Mask) == 0))
{
*ppsOut = &psPatchConstantSignatures[i];
return 1;
}
}
// There are situations (especially when using dcl_indexrange) where the compiler happily writes outside the actual masks.
// In those situations just take the last signature that uses that register (it's typically the "highest" one)
for (i = ui32NumVars - 1; i-- > 0;)
{
if (ui32Register == psPatchConstantSignatures[i].ui32Register)
{
*ppsOut = &psPatchConstantSignatures[i];
return 1;
}
}
ASSERT(allowNull);
return 0;
}
int ShaderInfo::GetOutputSignatureFromRegister(const uint32_t ui32Register,
const uint32_t ui32CompMask,
const uint32_t ui32Stream,
const InOutSignature** ppsOut,
bool allowNull /* = false */) const
{
size_t i;
const size_t ui32NumVars = psOutputSignatures.size();
ASSERT(ui32CompMask != 0);
for (i = 0; i < ui32NumVars; ++i)
{
if (ui32Register == psOutputSignatures[i].ui32Register &&
(ui32CompMask & psOutputSignatures[i].ui32Mask) &&
ui32Stream == psOutputSignatures[i].ui32Stream)
{
*ppsOut = &psOutputSignatures[i];
return 1;
}
}
ASSERT(allowNull);
return 0;
}
int ShaderInfo::GetOutputSignatureFromSystemValue(SPECIAL_NAME eSystemValueType, uint32_t ui32SemanticIndex, const InOutSignature** ppsOut) const
{
size_t i;
const size_t ui32NumVars = psOutputSignatures.size();
for (i = 0; i < ui32NumVars; ++i)
{
if (eSystemValueType == psOutputSignatures[i].eSystemValueType &&
ui32SemanticIndex == psOutputSignatures[i].ui32SemanticIndex)
{
*ppsOut = &psOutputSignatures[i];
return 1;
}
}
ASSERT(0);
return 0;
}
uint32_t ShaderInfo::GetCBVarSize(const ShaderVarType* psType, bool matrixAsVectors, bool wholeArraySize)
{
// Default is regular matrices, vectors and scalars
uint32_t size = psType->Columns * psType->Rows * 4;
// Struct size is calculated from the offset and size of its last member.
// Need to take into account that members could be arrays.
if (psType->Class == SVC_STRUCT)
{
size = psType->Members.back().Offset + GetCBVarSize(&psType->Members.back(), matrixAsVectors, true);
}
// Matrices represented as vec4 arrays have special size calculation
else if (matrixAsVectors)
{
if (psType->Class == SVC_MATRIX_ROWS)
{
size = psType->Rows * 16;
}
else if (psType->Class == SVC_MATRIX_COLUMNS)
{
size = psType->Columns * 16;
}
}
if (wholeArraySize && psType->Elements > 1)
{
uint32_t paddedSize = ((size + 15) / 16) * 16; // Arrays are padded to float4 size
size = (psType->Elements - 1) * paddedSize + size; // Except the last element
}
return size;
}
static const ShaderVarType* IsOffsetInType(const ShaderVarType* psType,
uint32_t parentOffset,
uint32_t offsetToFind,
bool* isArray,
std::vector<uint32_t>* arrayIndices,
int32_t* pi32Rebase,
uint32_t flags)
{
uint32_t thisOffset = parentOffset + psType->Offset;
uint32_t thisSize = ShaderInfo::GetCBVarSize(psType, (flags & HLSLCC_FLAG_TRANSLATE_MATRICES) != 0);
uint32_t paddedSize = ((thisSize + 15) / 16) * 16;
uint32_t arraySize = thisSize;
// Array elements are padded to align on vec4 size, except for the last one
if (psType->Elements)
arraySize = (paddedSize * (psType->Elements - 1)) + thisSize;
if ((offsetToFind >= thisOffset) &&
offsetToFind < (thisOffset + arraySize))
{
*isArray = false;
if (psType->Class == SVC_STRUCT)
{
if (psType->Elements > 1 && arrayIndices != NULL)
arrayIndices->push_back((offsetToFind - thisOffset) / thisSize);
// Need to bring offset back to element zero in case of array of structs
uint32_t offsetInStruct = (offsetToFind - thisOffset) % paddedSize;
uint32_t m = 0;
for (m = 0; m < psType->MemberCount; ++m)
{
const ShaderVarType* psMember = &psType->Members[m];
const ShaderVarType* foundType = IsOffsetInType(psMember, thisOffset, thisOffset + offsetInStruct, isArray, arrayIndices, pi32Rebase, flags);
if (foundType != NULL)
return foundType;
}
}
// Check for array of scalars or vectors (both take up 16 bytes per element).
// Matrices are also treated as arrays of vectors.
else if ((psType->Class == SVC_MATRIX_ROWS || psType->Class == SVC_MATRIX_COLUMNS) ||
((psType->Class == SVC_SCALAR || psType->Class == SVC_VECTOR) && psType->Elements > 1))
{
*isArray = true;
if (arrayIndices != NULL)
arrayIndices->push_back((offsetToFind - thisOffset) / 16);
}
else if (psType->Class == SVC_VECTOR)
{
//Check for vector starting at a non-vec4 offset.
// cbuffer $Globals
// {
//
// float angle; // Offset: 0 Size: 4
// float2 angle2; // Offset: 4 Size: 8
//
// }
//cb0[0].x = angle
//cb0[0].yzyy = angle2.xyxx
//Rebase angle2 so that .y maps to .x, .z maps to .y
pi32Rebase[0] = thisOffset % 16;
}
return psType;
}
return NULL;
}
int ShaderInfo::GetShaderVarFromOffset(const uint32_t ui32Vec4Offset,
const uint32_t(&pui32Swizzle)[4],
const ConstantBuffer* psCBuf,
const ShaderVarType** ppsShaderVar, // Output the found var
bool* isArray, // Output bool that tells if the found var is an array
std::vector<uint32_t>* arrayIndices, // Output vector of array indices in order from root parent to the found var
int32_t* pi32Rebase, // Output swizzle rebase
uint32_t flags)
{
size_t i;
uint32_t ui32ByteOffset = ui32Vec4Offset * 16;
//Swizzle can point to another variable. In the example below
//cbUIUpdates.g_uMaxFaces would be cb1[2].z. The scalars are combined
//into vectors. psCBuf->ui32NumVars will be 3.
// cbuffer cbUIUpdates
// {
// float g_fLifeSpan; // Offset: 0 Size: 4
// float g_fLifeSpanVar; // Offset: 4 Size: 4 [unused]
// float g_fRadiusMin; // Offset: 8 Size: 4 [unused]
// float g_fRadiusMax; // Offset: 12 Size: 4 [unused]
// float g_fGrowTime; // Offset: 16 Size: 4 [unused]
// float g_fStepSize; // Offset: 20 Size: 4
// float g_fTurnRate; // Offset: 24 Size: 4
// float g_fTurnSpeed; // Offset: 28 Size: 4 [unused]
// float g_fLeafRate; // Offset: 32 Size: 4
// float g_fShrinkTime; // Offset: 36 Size: 4 [unused]
// uint g_uMaxFaces; // Offset: 40 Size: 4
// }
if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Y)
{
ui32ByteOffset += 4;
}
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_Z)
{
ui32ByteOffset += 8;
}
else if (pui32Swizzle[0] == OPERAND_4_COMPONENT_W)
{
ui32ByteOffset += 12;
}
const size_t ui32NumVars = psCBuf->asVars.size();
for (i = 0; i < ui32NumVars; ++i)
{
ppsShaderVar[0] = IsOffsetInType(&psCBuf->asVars[i].sType, psCBuf->asVars[i].ui32StartOffset, ui32ByteOffset, isArray, arrayIndices, pi32Rebase, flags);
if (ppsShaderVar[0] != NULL)
return 1;
}
return 0;
}
// Patches the fullName of the var with given array indices. Does not insert the indexing for the var itself if it is an array.
// Searches for brackets and inserts indices one by one.
std::string ShaderInfo::GetShaderVarIndexedFullName(const ShaderVarType* psShaderVar, const std::vector<uint32_t>& indices, const std::string& dynamicIndex, bool revertDynamicIndexCalc, bool matrixAsVectors)
{
std::ostringstream oss;
size_t prevpos = 0;
size_t pos = psShaderVar->fullName.find('[', 0);
uint32_t i = 0;
while (pos != std::string::npos)
{
pos++;
oss << psShaderVar->fullName.substr(prevpos, pos - prevpos);
// Add possibly given dynamic index for the root array.
if (i == 0 && !dynamicIndex.empty())
{
oss << dynamicIndex;
// if we couldn't use original index temp, revert the float4 address calc here
if (revertDynamicIndexCalc)
{
const ShaderVarType* psRootVar = psShaderVar;
while (psRootVar->Parent != NULL)
psRootVar = psRootVar->Parent;
uint32_t thisSize = (GetCBVarSize(psRootVar, matrixAsVectors) + 15) / 16; // size in float4
oss << " / " << thisSize;
}
if (!indices.empty() && indices[i] != 0)
oss << " + " << indices[i];
}
else if (i < indices.size())
oss << indices[i];
prevpos = pos;
i++;
pos = psShaderVar->fullName.find('[', prevpos);
}
oss << psShaderVar->fullName.substr(prevpos);
return oss.str();
}
ResourceGroup ShaderInfo::ResourceTypeToResourceGroup(ResourceType eType)
{
switch (eType)
{
case RTYPE_CBUFFER:
return RGROUP_CBUFFER;
case RTYPE_SAMPLER:
return RGROUP_SAMPLER;
case RTYPE_TEXTURE:
case RTYPE_BYTEADDRESS:
case RTYPE_STRUCTURED:
return RGROUP_TEXTURE;
case RTYPE_UAV_RWTYPED:
case RTYPE_UAV_RWSTRUCTURED:
case RTYPE_UAV_RWBYTEADDRESS:
case RTYPE_UAV_APPEND_STRUCTURED:
case RTYPE_UAV_CONSUME_STRUCTURED:
case RTYPE_UAV_RWSTRUCTURED_WITH_COUNTER:
return RGROUP_UAV;
case RTYPE_TBUFFER:
ASSERT(0); // Need to find out which group this belongs to
return RGROUP_TEXTURE;
default:
break;
}
ASSERT(0);
return RGROUP_CBUFFER;
}
static inline std::string GetTextureNameFromSamplerName(const std::string& samplerIn)
{
ASSERT(samplerIn.compare(0, 7, "sampler") == 0);
// please note that we do not have hard rules about how sampler names should be structured
// what's more they can even skip texture name (but that should be handled separately)
// how do we try to deduce the texture name: we remove known tokens, and take the leftmost (first) "word"
// note that we want to support c-style naming (with underscores for spaces)
// as it is pretty normal to have texture name starting with underscore
// we bind underscores "to the right"
// note that we want sampler state to be case insensitive
// while checking for a match could be done with strncasecmp/_strnicmp
// windows is missing case-insensetive "find substring" (strcasestr), so we transform to lowercase instead
std::string sampler = samplerIn;
for (std::string::iterator i = sampler.begin(), in = sampler.end(); i != in; ++i)
*i = std::tolower(*i);
struct Token { const char* str; int len; };
#define TOKEN(s) { s, (int)strlen(s) }
Token token[] = {
TOKEN("compare"),
TOKEN("point"), TOKEN("trilinear"), TOKEN("linear"),
TOKEN("clamp"), TOKEN("clampu"), TOKEN("clampv"), TOKEN("clampw"),
TOKEN("repeat"), TOKEN("repeatu"), TOKEN("repeatv"), TOKEN("repeatw"),
TOKEN("mirror"), TOKEN("mirroru"), TOKEN("mirrorv"), TOKEN("mirrorw"),
TOKEN("mirroronce"), TOKEN("mirroronceu"), TOKEN("mirroroncev"), TOKEN("mirroroncew"),
};
#undef TOKEN
const char* s = sampler.c_str();
for (int texNameStart = 7; s[texNameStart];)
{
// skip underscores and find the potential beginning of a token
int tokenStart = texNameStart, tokenEnd = -1;
while (s[tokenStart] == '_')
++tokenStart;
// check token list for matches
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n && tokenEnd < 0; ++i)
if (strncmp(s + tokenStart, token[i].str, token[i].len) == 0)
tokenEnd = tokenStart + token[i].len;
if (tokenEnd < 0)
{
// we have found texture name
// find next token
int nextTokenStart = sampler.length();
for (int i = 0, n = sizeof(token) / sizeof(token[0]); i < n; ++i)
{
// again: note that we want to be case insensitive
const int pos = sampler.find(token[i].str, tokenStart);
if (pos != std::string::npos && pos < nextTokenStart)
nextTokenStart = pos;
}
// check preceeding underscores, but only if we have found an actual token (not the end of the string)
if (nextTokenStart < sampler.length())
{
while (nextTokenStart > tokenStart && s[nextTokenStart - 1] == '_')
--nextTokenStart;
}
// note that we return the substring of the initial sampler name to preserve case
return samplerIn.substr(texNameStart, nextTokenStart - texNameStart);
}
else
{
// we have found known token
texNameStart = tokenEnd;
}
}
// if we ended up here, the texture name is missing
return "";
}
// note that we dont have the means right now to have unit tests in hlslcc, so we do poor man testing below
// AddSamplerPrecisions is called once for every program, so it is easy to uncomment and test
static inline void Test_GetTextureNameFromSamplerName()
{
#define CHECK(s, t) ASSERT(GetTextureNameFromSamplerName(std::string(s)) == std::string(t))
CHECK("sampler_point_clamp", "");
CHECK("sampler_point_clamp_Tex", "_Tex");
CHECK("sampler_point_clamp_Tex__", "_Tex__");
CHECK("sampler_______point_Tex", "_Tex");
CHECK("samplerPointClamp", "");
CHECK("samplerPointClamp_Tex", "_Tex");
CHECK("samplerPointClamp_Tex__", "_Tex__");
CHECK("samplerPointTexClamp", "Tex");
CHECK("samplerPoint_TexClamp", "_Tex");
CHECK("samplerPoint_Tex_Clamp", "_Tex");
#undef CHECK
}
void ShaderInfo::AddSamplerPrecisions(HLSLccSamplerPrecisionInfo &info)
{
if (info.empty())
return;
#if _DEBUG && 0
Test_GetTextureNameFromSamplerName();
#endif
for (size_t i = 0; i < psResourceBindings.size(); i++)
{
ResourceBinding *rb = &psResourceBindings[i];
if (rb->eType != RTYPE_SAMPLER && rb->eType != RTYPE_TEXTURE && rb->eType != RTYPE_UAV_RWTYPED)
continue;
// Try finding the exact match
HLSLccSamplerPrecisionInfo::iterator j = info.find(rb->name);
// If match not found, check if name has "sampler" prefix (DX11 style sampler case)
// then we try to recover texture name from sampler name
if (j == info.end() && rb->name.compare(0, 7, "sampler") == 0)
j = info.find(GetTextureNameFromSamplerName(rb->name));
// note that if we didnt find the respective texture, we cannot say anything about sampler precision
// currently it will become "unknown" resulting in half format, even if we sample with it the texture explicitly marked as float
// TODO: should we somehow allow overriding it?
if (j != info.end())
rb->ePrecision = j->second;
}
}

View File

@ -1,814 +0,0 @@
#include "internal_includes/UseDefineChains.h"
#include "internal_includes/debug.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/ControlFlowGraph.h"
#include "internal_includes/debug.h"
#include "internal_includes/HLSLccToolkit.h"
#include <algorithm>
using HLSLcc::ForEachOperand;
#define DEBUG_UDCHAINS 0
#if DEBUG_UDCHAINS
// Debug mode
static void UDCheckConsistencyDUChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
DefineUseChain::iterator du = psDUChains[idx].begin();
UseDefineChain::iterator ud = psUDChains[idx].begin();
while (du != psDUChains[idx].end())
{
ASSERT(du->index == idx % 4);
// Check that the definition actually writes to idx
{
uint32_t tempReg = idx / 4;
uint32_t offs = idx - (tempReg * 4);
uint32_t accessMask = 1 << offs;
uint32_t i;
int found = 0;
for (i = 0; i < du->psInst->ui32FirstSrc; i++)
{
if (du->psInst->asOperands[i].eType == OPERAND_TYPE_TEMP)
{
if (du->psInst->asOperands[i].ui32RegisterNumber == tempReg)
{
uint32_t writeMask = GetOperandWriteMask(&du->psInst->asOperands[i]);
if (writeMask & accessMask)
{
ASSERT(writeMask == du->writeMask);
found = 1;
break;
}
}
}
}
ASSERT(found);
}
// Check that each usage of each definition also is found in the use-define chain
UsageSet::iterator ul = du->usages.begin();
while (ul != du->usages.end())
{
// Search for the usage in the chain
UseDefineChain::iterator use = ud;
while (use != psUDChains[idx].end() && &*use != *ul)
use++;
ASSERT(use != psUDChains[idx].end());
ASSERT(&*use == *ul);
// Check that the mapping back is also found
ASSERT(std::find(use->defines.begin(), use->defines.end(), &*du) != use->defines.end());
ul++;
}
du++;
}
}
static void UDCheckConsistencyUDChain(uint32_t idx, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
DefineUseChain::iterator du = psDUChains[idx].begin();
UseDefineChain::iterator ud = psUDChains[idx].begin();
while (ud != psUDChains[idx].end())
{
// Check that each definition of each usage also is found in the define-use chain
DefineSet::iterator dl = ud->defines.begin();
ASSERT(ud->psOp->ui32RegisterNumber == idx / 4);
ASSERT(ud->index == idx % 4);
while (dl != ud->defines.end())
{
// Search for the definition in the chain
DefineUseChain::iterator def = du;
while (def != psDUChains[idx].end() && &*def != *dl)
def++;
ASSERT(def != psDUChains[idx].end());
ASSERT(&*def == *dl);
// Check that the mapping back is also found
ASSERT(std::find(def->usages.begin(), def->usages.end(), &*ud) != def->usages.end());
dl++;
}
ud++;
}
}
static void UDCheckConsistency(uint32_t tempRegs, DefineUseChains &psDUChains, UseDefineChains &psUDChains, ActiveDefinitions &activeDefinitions)
{
uint32_t i;
for (i = 0; i < tempRegs * 4; i++)
{
UDCheckConsistencyDUChain(i, psDUChains, psUDChains, activeDefinitions);
UDCheckConsistencyUDChain(i, psDUChains, psUDChains, activeDefinitions);
}
}
#define printf_console printf
#endif
using namespace HLSLcc::ControlFlow;
using std::for_each;
static DefineUseChainEntry *GetOrCreateDefinition(const BasicBlock::Definition &def, DefineUseChain &psDUChain, uint32_t index)
{
// Try to find an existing entry
auto itr = std::find_if(psDUChain.begin(), psDUChain.end(), [&](const DefineUseChainEntry &de)
{
return de.psInst == def.m_Instruction && de.psOp == def.m_Operand;
});
if (itr != psDUChain.end())
{
return &(*itr);
}
// Not found, create
psDUChain.push_front(DefineUseChainEntry());
DefineUseChainEntry &de = *psDUChain.begin();
de.psInst = (Instruction *)def.m_Instruction;
de.psOp = (Operand *)def.m_Operand;
de.index = index;
de.writeMask = def.m_Operand->GetAccessMask();
de.psSiblings[index] = &de;
return &de;
}
// Do flow control analysis on the instructions and build the define-use and use-define chains
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChain, UseDefineChains &psUDChain, HLSLcc::ControlFlow::ControlFlowGraph &cfg)
{
ActiveDefinitions lastSeenDefinitions(ui32NumTemps * 4, NULL); // Array of pointers to the currently active definition for each temp
psDUChain.clear();
psUDChain.clear();
for (uint32_t i = 0; i < ui32NumTemps * 4; i++)
{
psUDChain.insert(std::make_pair(i, UseDefineChain()));
psDUChain.insert(std::make_pair(i, DefineUseChain()));
}
const ControlFlowGraph::BasicBlockStorage &blocks = cfg.AllBlocks();
// Loop through each block, first calculate the union of all the reachables of all preceding blocks
// and then build on that as we go along the basic block instructions
for_each(blocks.begin(), blocks.end(), [&](const HLSLcc::shared_ptr<BasicBlock> &bptr)
{
const BasicBlock &b = *bptr.get();
BasicBlock::ReachableVariables rvars;
for_each(b.Preceding().begin(), b.Preceding().end(), [&](const Instruction *precBlock)
{
const BasicBlock &b = *cfg.GetBasicBlockForInstruction(precBlock);
BasicBlock::RVarUnion(rvars, b.Reachable());
});
// Now we have a Reachable set for the beginning of this block in rvars. Loop through all instructions and their operands and pick up uses and definitions
for (const Instruction *inst = b.First(); inst <= b.Last(); inst++)
{
// Process sources first
ForEachOperand(inst, inst + 1, FEO_FLAG_SRC_OPERAND | FEO_FLAG_SUBOPERAND,
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Add an use for all visible definitions
psUDChain[regIdx].push_front(UseDefineChainEntry());
UseDefineChainEntry &ue = *psUDChain[regIdx].begin();
ue.psInst = (Instruction *)psInst;
ue.psOp = (Operand *)psOperand;
ue.accessMask = accessMask;
ue.index = k;
ue.psSiblings[k] = &ue;
// ue.siblings will be filled out later.
BasicBlock::ReachableDefinitionsPerVariable& rpv = rvars[regIdx];
for_each(rpv.begin(), rpv.end(), [&](const BasicBlock::Definition &def)
{
DefineUseChainEntry *duentry = GetOrCreateDefinition(def, psDUChain[regIdx], k);
ue.defines.insert(duentry);
duentry->usages.insert(&ue);
});
}
return;
});
// Then the destination operands
ForEachOperand(inst, inst + 1, FEO_FLAG_DEST_OPERAND,
[&](const Instruction *psInst, const Operand *psOperand, uint32_t ui32OperandType)
{
if (psOperand->eType != OPERAND_TYPE_TEMP)
return;
uint32_t tempReg = psOperand->ui32RegisterNumber;
uint32_t accessMask = psOperand->GetAccessMask();
// Go through each component
for (int k = 0; k < 4; k++)
{
if (!(accessMask & (1 << k)))
continue;
uint32_t regIdx = tempReg * 4 + k;
// Overwrite whatever's in rvars; they are killed by this
rvars[regIdx].clear();
rvars[regIdx].insert(BasicBlock::Definition(psInst, psOperand));
// Make sure the definition gets created even though it doesn't have any uses at all
// (happens when sampling a texture but not all channels are used etc).
GetOrCreateDefinition(BasicBlock::Definition(psInst, psOperand), psDUChain[regIdx], k);
}
return;
});
}
});
// Connect the siblings for all uses and definitions
for_each(psUDChain.begin(), psUDChain.end(), [&](std::pair<const uint32_t, UseDefineChain> &udpair)
{
UseDefineChain &ud = udpair.second;
// Clear out the bottom 2 bits to get the actual base reg
uint32_t baseReg = udpair.first & ~(3);
for_each(ud.begin(), ud.end(), [&](UseDefineChainEntry &ue)
{
ASSERT(baseReg / 4 == ue.psOp->ui32RegisterNumber);
// Go through each component
for (int k = 0; k < 4; k++)
{
// Skip components that we don't access, or the one that's our own
if (!(ue.accessMask & (1 << k)) || ue.index == k)
continue;
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
UseDefineChain::iterator siblItr = std::find_if(psUDChain[baseReg + k].begin(), psUDChain[baseReg + k].end(), [&](const UseDefineChainEntry &_sibl) -> bool { return _sibl.psOp == ue.psOp; });
ASSERT(siblItr != psUDChain[baseReg + k].end());
UseDefineChainEntry &sibling = *siblItr;
ue.psSiblings[k] = &sibling;
}
});
});
// Same for definitions
for_each(psDUChain.begin(), psDUChain.end(), [&](std::pair<const uint32_t, DefineUseChain> &dupair)
{
DefineUseChain &du = dupair.second;
// Clear out the bottom 2 bits to get the actual base reg
uint32_t baseReg = dupair.first & ~(3);
for_each(du.begin(), du.end(), [&](DefineUseChainEntry &de)
{
ASSERT(baseReg / 4 == de.psOp->ui32RegisterNumber);
// Go through each component
for (int k = 0; k < 4; k++)
{
// Skip components that we don't access, or the one that's our own
if (!(de.writeMask & (1 << k)) || de.index == k)
continue;
// Find the corresponding sibling. We can uniquely identify it by the operand pointer alone.
DefineUseChain::iterator siblItr = std::find_if(psDUChain[baseReg + k].begin(), psDUChain[baseReg + k].end(), [&](const DefineUseChainEntry &_sibl) -> bool { return _sibl.psOp == de.psOp; });
ASSERT(siblItr != psDUChain[baseReg + k].end());
DefineUseChainEntry &sibling = *siblItr;
de.psSiblings[k] = &sibling;
}
});
});
#if DEBUG_UDCHAINS
UDCheckConsistency(ui32NumTemps, psDUChain, psUDChain, lastSeenDefinitions);
#endif
}
typedef std::vector<DefineUseChainEntry *> SplitDefinitions;
// Split out a define to use a new temp register
static void UDDoSplit(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
uint32_t newReg = *psNumTemps;
uint32_t oldReg = defs[0]->psOp->ui32RegisterNumber;
uint32_t accessMask = defs[0]->writeMask;
uint32_t i, u32def;
uint32_t rebase, count;
uint32_t splitTableValue;
ASSERT(defs.size() > 0);
for (i = 1; i < defs.size(); i++)
{
ASSERT(defs[i]->psOp->ui32RegisterNumber == oldReg);
accessMask |= defs[i]->writeMask;
}
(*psNumTemps)++;
#if DEBUG_UDCHAINS
UDCheckConsistency((*psNumTemps) - 1, psDUChains, psUDChains, ActiveDefinitions());
#endif
ASSERT(accessMask != 0 && accessMask <= 0xf);
// Calculate rebase value and component count
rebase = 0;
count = 0;
i = accessMask;
while ((i & 1) == 0)
{
rebase++;
i = i >> 1;
}
while (i != 0)
{
count++;
i = i >> 1;
}
// Make sure there's enough room in the split table
if (pui32SplitTable.size() <= newReg)
{
size_t newSize = pui32SplitTable.size() * 2;
pui32SplitTable.resize(newSize, 0xffffffff);
}
// Set the original temp of the new register
{
uint32_t origTemp = oldReg;
while (pui32SplitTable[origTemp] != 0xffffffff)
origTemp = pui32SplitTable[origTemp] & 0xffff;
ASSERT(rebase < 4);
ASSERT(count <= 4);
splitTableValue = (count << 24) | (rebase << 16) | origTemp;
pui32SplitTable[newReg] = splitTableValue;
}
// Insert the new temps to the map
for (i = newReg * 4; i < newReg * 4 + 4; i++)
{
psUDChains.insert(std::make_pair(i, UseDefineChain()));
psDUChains.insert(std::make_pair(i, DefineUseChain()));
}
for (u32def = 0; u32def < defs.size(); u32def++)
{
DefineUseChainEntry *defineToSplit = defs[u32def];
uint32_t oldIdx = defineToSplit->index;
#if DEBUG_UDCHAINS
printf("Split def at instruction %d (reg %d -> %d, access %X, rebase %d, count: %d)\n", (int)defineToSplit->psInst->id, oldReg, newReg, accessMask, rebase, count);
#endif
// We may have moved the opcodes already because of multiple defines pointing to the same op
if (defineToSplit->psOp->ui32RegisterNumber != newReg)
{
ASSERT(defineToSplit->psOp->ui32RegisterNumber == oldReg);
// Update the declaration operand
// Don't change possible suboperands as they are sources
defineToSplit->psInst->ChangeOperandTempRegister(defineToSplit->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
}
defineToSplit->writeMask >>= rebase;
defineToSplit->index -= rebase;
// Change the temp register number for all usages
UsageSet::iterator ul = defineToSplit->usages.begin();
while (ul != defineToSplit->usages.end())
{
// Already updated by one of the siblings? Skip.
if ((*ul)->psOp->ui32RegisterNumber != newReg)
{
ASSERT((*ul)->psOp->ui32RegisterNumber == oldReg);
(*ul)->psInst->ChangeOperandTempRegister((*ul)->psOp, oldReg, newReg, accessMask, UD_CHANGE_MAIN_OPERAND, rebase);
}
// Update the UD chain
{
UseDefineChain::iterator udLoc = psUDChains[oldReg * 4 + oldIdx].begin();
while (udLoc != psUDChains[oldReg * 4 + oldIdx].end())
{
if (&*udLoc == *ul)
{
// Move to new list
psUDChains[newReg * 4 + oldIdx - rebase].splice(psUDChains[newReg * 4 + oldIdx - rebase].begin(), psUDChains[oldReg * 4 + oldIdx], udLoc);
if (rebase > 0)
{
(*ul)->accessMask >>= rebase;
(*ul)->index -= rebase;
memmove((*ul)->psSiblings, (*ul)->psSiblings + rebase, (4 - rebase) * sizeof(UseDefineChain *));
}
break;
}
udLoc++;
}
}
ul++;
}
// Move the define out of the old chain (if its still there)
{
// Find the define in the old chain
DefineUseChain::iterator duLoc = psDUChains[oldReg * 4 + oldIdx].begin();
while (duLoc != psDUChains[oldReg * 4 + oldIdx].end() && ((&*duLoc) != defineToSplit))
{
duLoc++;
}
ASSERT(duLoc != psDUChains[oldReg * 4 + oldIdx].end());
{
// Move directly to new chain
psDUChains[newReg * 4 + oldIdx - rebase].splice(psDUChains[newReg * 4 + oldIdx - rebase].begin(), psDUChains[oldReg * 4 + oldIdx], duLoc);
if (rebase != 0)
{
memmove(defineToSplit->psSiblings, defineToSplit->psSiblings + rebase, (4 - rebase) * sizeof(DefineUseChain *));
}
}
}
}
#if DEBUG_UDCHAINS
UDCheckConsistency(*psNumTemps, psDUChains, psUDChains, ActiveDefinitions());
#endif
}
// Adds a define and all its siblings to the list, checking duplicates
static void AddDefineToList(SplitDefinitions &defs, DefineUseChainEntry *newDef)
{
uint32_t k;
for (k = 0; k < 4; k++)
{
if (newDef->psSiblings[k])
{
DefineUseChainEntry *defToAdd = newDef->psSiblings[k];
uint32_t m;
int defFound = 0;
for (m = 0; m < defs.size(); m++)
{
if (defs[m] == defToAdd)
{
defFound = 1;
break;
}
}
if (defFound == 0)
{
defs.push_back(newDef->psSiblings[k]);
}
}
}
}
// Check if a set of definitions can be split and does the split. Returns nonzero if a split took place
static int AttemptSplitDefinitions(SplitDefinitions &defs, uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
uint32_t reg;
uint32_t combinedMask;
uint32_t i, k, u32def;
int canSplit = 1;
DefineUseChain::iterator du;
int hasLeftoverDefinitions = 0;
// Initial checks: all definitions must:
// Access the same register
// Have at least one definition in any of the 4 register slots that isn't included
if (defs.empty())
return 0;
reg = defs[0]->psOp->ui32RegisterNumber;
combinedMask = defs[0]->writeMask;
for (i = 1; i < defs.size(); i++)
{
if (reg != defs[i]->psOp->ui32RegisterNumber)
return 0;
combinedMask |= defs[i]->writeMask;
}
for (i = 0; i < 4; i++)
{
du = psDUChains[reg * 4 + i].begin();
while (du != psDUChains[reg * 4 + i].end())
{
int defFound = 0;
for (k = 0; k < defs.size(); k++)
{
if (&*du == defs[k])
{
defFound = 1;
break;
}
}
if (defFound == 0)
{
hasLeftoverDefinitions = 1;
break;
}
du++;
}
if (hasLeftoverDefinitions)
break;
}
// We'd be splitting the entire register and all its definitions, no point in that.
if (hasLeftoverDefinitions == 0)
return 0;
// Check all the definitions. Any of them must not have any usages that see any definitions not in our defs array.
for (u32def = 0; u32def < defs.size(); u32def++)
{
DefineUseChainEntry *def = defs[u32def];
UsageSet::iterator ul = def->usages.begin();
while (ul != def->usages.end())
{
uint32_t j;
// Check that we only read a subset of the combined writemask
if (((*ul)->accessMask & (~combinedMask)) != 0)
{
// Do an additional attempt, pick up all the sibling definitions as well
// Only do this if we have the space in the definitions table
for (j = 0; j < 4; j++)
{
if (((*ul)->accessMask & (1 << j)) == 0)
continue;
AddDefineToList(defs, *(*ul)->psSiblings[j]->defines.begin());
}
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
}
// It must have at least one declaration
ASSERT(!(*ul)->defines.empty());
// Check that all siblings for the usage use one of the definitions
for (j = 0; j < 4; j++)
{
uint32_t m;
int defineFound = 0;
if (((*ul)->accessMask & (1 << j)) == 0)
continue;
ASSERT((*ul)->psSiblings[j] != NULL);
ASSERT(!(*ul)->psSiblings[j]->defines.empty());
// Check that all definitions for this usage are found from the definitions table
DefineSet::iterator dl = (*ul)->psSiblings[j]->defines.begin();
while (dl != (*ul)->psSiblings[j]->defines.end())
{
defineFound = 0;
for (m = 0; m < defs.size(); m++)
{
if (*dl == defs[m])
{
defineFound = 1;
break;
}
}
if (defineFound == 0)
{
// Add this define and all its siblings to the table and try again
AddDefineToList(defs, *dl);
return AttemptSplitDefinitions(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
}
dl++;
}
if (defineFound == 0)
{
canSplit = 0;
break;
}
}
if (canSplit == 0)
break;
// This'll do, check next usage
ul++;
}
if (canSplit == 0)
break;
}
if (canSplit)
{
UDDoSplit(defs, psNumTemps, psDUChains, psUDChains, pui32SplitTable);
return 1;
}
return 0;
}
// Do temp splitting based on use-define chains
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable)
{
// Algorithm overview:
// Take each definition and look at all its usages. If all usages only see this definition (and this is not the only definition for this variable),
// split it out.
uint32_t i;
uint32_t tempsAtStart = *psNumTemps; // We don't need to try to analyze the newly created ones, they're unsplittable by definition
for (i = 0; i < tempsAtStart * 4; i++)
{
// No definitions?
if (psDUChains[i].empty())
continue;
DefineUseChain::iterator du = psDUChains[i].begin();
// Ok we have multiple definitions for a temp, check them through
while (du != psDUChains[i].end())
{
SplitDefinitions sd;
AddDefineToList(sd, &*du);
du++;
// If we split, we'll have to start from the beginning of this chain because du might no longer be in this chain
if (AttemptSplitDefinitions(sd, psNumTemps, psDUChains, psUDChains, pui32SplitTable))
{
du = psDUChains[i].begin();
}
}
}
}
// Returns true if all the usages of this definitions are instructions that deal with floating point data
static bool HasOnlyFloatUsages(DefineUseChain::iterator du)
{
UsageSet::iterator itr = du->usages.begin();
for (; itr != du->usages.end(); itr++)
{
Instruction *psInst = (*itr)->psInst;
if ((*itr)->psOp->eMinPrecision != OPERAND_MIN_PRECISION_DEFAULT)
return false;
switch (psInst->eOpcode)
{
case OPCODE_ADD:
case OPCODE_MUL:
case OPCODE_MOV:
case OPCODE_MAD:
case OPCODE_DIV:
case OPCODE_LOG:
case OPCODE_EXP:
case OPCODE_MAX:
case OPCODE_MIN:
case OPCODE_DP2:
case OPCODE_DP2ADD:
case OPCODE_DP3:
case OPCODE_DP4:
case OPCODE_RSQ:
case OPCODE_SQRT:
break;
default:
return false;
}
}
return true;
}
// Based on the sampler precisions, downgrade the definitions if possible.
void UpdateSamplerPrecisions(const ShaderInfo &info, DefineUseChains &psDUChains, uint32_t ui32NumTemps)
{
uint32_t madeProgress = 0;
do
{
uint32_t i;
madeProgress = 0;
for (i = 0; i < ui32NumTemps * 4; i++)
{
DefineUseChain::iterator du = psDUChains[i].begin();
while (du != psDUChains[i].end())
{
OPERAND_MIN_PRECISION sType = OPERAND_MIN_PRECISION_DEFAULT;
if (du->psInst->IsPartialPrecisionSamplerInstruction(info, &sType)
&& du->psInst->asOperands[0].eType == OPERAND_TYPE_TEMP
&& du->psInst->asOperands[0].eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT
&& du->isStandalone
&& HasOnlyFloatUsages(du))
{
uint32_t sibl;
// Ok we can change the precision.
ASSERT(du->psOp->eType == OPERAND_TYPE_TEMP);
ASSERT(sType != OPERAND_MIN_PRECISION_DEFAULT);
du->psOp->eMinPrecision = sType;
// Update all the uses of all the siblings
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
while (ul != du->psSiblings[sibl]->usages.end())
{
ASSERT((*ul)->psOp->eMinPrecision == OPERAND_MIN_PRECISION_DEFAULT ||
(*ul)->psOp->eMinPrecision == sType);
// We may well write this multiple times to the same op but that's fine.
(*ul)->psOp->eMinPrecision = sType;
ul++;
}
}
madeProgress = 1;
}
du++;
}
}
}
while (madeProgress != 0);
}
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps)
{
uint32_t i;
for (i = 0; i < ui32NumTemps * 4; i++)
{
DefineUseChain::iterator du = psDUChains[i].begin();
while (du != psDUChains[i].end())
{
uint32_t sibl;
int isStandalone = 1;
if (du->isStandalone)
{
du++;
continue;
}
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
UsageSet::iterator ul = du->psSiblings[sibl]->usages.begin();
while (ul != du->psSiblings[sibl]->usages.end())
{
uint32_t k;
ASSERT(!(*ul)->defines.empty());
// Need to check that all the siblings of this usage only see this definition's corresponding sibling
for (k = 0; k < 4; k++)
{
if (!(*ul)->psSiblings[k])
continue;
if ((*ul)->psSiblings[k]->defines.size() > 1
|| *(*ul)->psSiblings[k]->defines.begin() != du->psSiblings[k])
{
isStandalone = 0;
break;
}
}
if (isStandalone == 0)
break;
ul++;
}
if (isStandalone == 0)
break;
}
if (isStandalone)
{
// Yep, mark it
for (sibl = 0; sibl < 4; sibl++)
{
if (!du->psSiblings[sibl])
continue;
du->psSiblings[sibl]->isStandalone = 1;
}
}
du++;
}
}
}
// Write the uses and defines back to Instruction and Operand member lists.
void WriteBackUsesAndDefines(DefineUseChains &psDUChains)
{
using namespace std;
// Loop through the whole data structure, and write usages and defines to Instructions and Operands as we see them
for_each(psDUChains.begin(), psDUChains.end(), [](const DefineUseChains::value_type &itr)
{
const DefineUseChain &duChain = itr.second;
for_each(duChain.begin(), duChain.end(), [](const DefineUseChain::value_type &du)
{
for_each(du.usages.begin(), du.usages.end(), [&du](const UseDefineChainEntry *usage)
{
// Update instruction use list
du.psInst->m_Uses.push_back(Instruction::Use(usage->psInst, usage->psOp));
// And the usage's definition
usage->psOp->m_Defines.push_back(Operand::Define(du.psInst, du.psOp));
});
});
});
}

View File

@ -1,87 +0,0 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bsafe.c
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#include <stdio.h>
#include <stdlib.h>
#include "bsafe.h"
#if 0
static int bsafeShouldExit = 1;
char * strcpy(char *dst, const char *src);
char * strcat(char *dst, const char *src);
char * strcpy(char *dst, const char *src)
{
dst = dst;
src = src;
fprintf(stderr, "bsafe error: strcpy() is not safe, use bstrcpy instead.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
char * strcat(char *dst, const char *src)
{
dst = dst;
src = src;
fprintf(stderr, "bsafe error: strcat() is not safe, use bstrcat instead.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
char * (gets)(char * buf) {
buf = buf;
fprintf(stderr, "bsafe error: gets() is not safe, use bgets.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
#endif
char * (strncpy)(char *dst, const char *src, size_t n) {
dst = dst;
src = src;
n = n;
fprintf(stderr, "bsafe error: strncpy() is not safe, use bmidstr instead.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
char * (strncat)(char *dst, const char *src, size_t n) {
dst = dst;
src = src;
n = n;
fprintf(stderr, "bsafe error: strncat() is not safe, use bstrcat then btrunc\n\tor cstr2tbstr, btrunc then bstrcat instead.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
char * (strtok)(char *s1, const char *s2) {
s1 = s1;
s2 = s2;
fprintf(stderr, "bsafe error: strtok() is not safe, use bsplit or bsplits instead.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
char * (strdup)(const char *s) {
s = s;
fprintf(stderr, "bsafe error: strdup() is not safe, use bstrcpy.\n");
if (bsafeShouldExit) exit(-1);
return NULL;
}
#endif

View File

@ -1,43 +0,0 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bsafe.h
*
* This is an optional module that can be used to help enforce a safety
* standard based on pervasive usage of bstrlib. This file is not necessarily
* portable, however, it has been tested to work correctly with Intel's C/C++
* compiler, WATCOM C/C++ v11.x and Microsoft Visual C++.
*/
#ifndef BSTRLIB_BSAFE_INCLUDE
#define BSTRLIB_BSAFE_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#if !defined(__GNUC__) && (!defined(_MSC_VER) || (_MSC_VER <= 1310))
/* This is caught in the linker, so its not necessary for gcc. */
extern char * (gets)(char * buf);
#endif
extern char * (strncpy)(char *dst, const char *src, size_t n);
extern char * (strncat)(char *dst, const char *src, size_t n);
extern char * (strtok)(char *s1, const char *s2);
extern char * (strdup)(const char *s);
#undef strcpy
#undef strcat
#define strcpy(a, b) bsafe_strcpy(a,b)
#define strcat(a, b) bsafe_strcat(a,b)
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,112 +0,0 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bstraux.h
*
* This file is not a necessary part of the core bstring library itself, but
* is just an auxilliary module which includes miscellaneous or trivial
* functions.
*/
#ifndef BSTRAUX_INCLUDE
#define BSTRAUX_INCLUDE
#include <time.h>
#include "bstrlib.h"
#ifdef __cplusplus
extern "C" {
#endif
/* Safety mechanisms */
#define bstrDeclare(b) bstring (b) = NULL;
#define bstrFree(b) {if ((b) != NULL && (b)->slen >= 0 && (b)->mlen >= (b)->slen) { bdestroy (b); (b) = NULL; }}
/* Backward compatibilty with previous versions of Bstrlib */
#define bAssign(a, b) ((bassign)((a), (b)))
#define bSubs(b, pos, len, a, c) ((breplace)((b),(pos),(len),(a),(unsigned char)(c)))
#define bStrchr(b, c) ((bstrchr)((b), (c)))
#define bStrchrFast(b, c) ((bstrchr)((b), (c)))
#define bCatCstr(b, s) ((bcatcstr)((b), (s)))
#define bCatBlk(b, s, len) ((bcatblk)((b),(s),(len)))
#define bCatStatic(b, s) bCatBlk ((b), ("" s ""), sizeof (s) - 1)
#define bTrunc(b, n) ((btrunc)((b), (n)))
#define bReplaceAll(b, find, repl, pos) ((bfindreplace)((b),(find),(repl),(pos)))
#define bUppercase(b) ((btoupper)(b))
#define bLowercase(b) ((btolower)(b))
#define bCaselessCmp(a, b) ((bstricmp)((a), (b)))
#define bCaselessNCmp(a, b, n) ((bstrnicmp)((a), (b), (n)))
#define bBase64Decode(b) (bBase64DecodeEx ((b), NULL))
#define bUuDecode(b) (bUuDecodeEx ((b), NULL))
/* Unusual functions */
extern struct bStream * bsFromBstr(const_bstring b);
extern bstring bTail(bstring b, int n);
extern bstring bHead(bstring b, int n);
extern int bSetCstrChar(bstring a, int pos, char c);
extern int bSetChar(bstring b, int pos, char c);
extern int bFill(bstring a, char c, int len);
extern int bReplicate(bstring b, int n);
extern int bReverse(bstring b);
extern int bInsertChrs(bstring b, int pos, int len, unsigned char c, unsigned char fill);
extern bstring bStrfTime(const char * fmt, const struct tm * timeptr);
#define bAscTime(t) (bStrfTime ("%c\n", (t)))
#define bCTime(t) ((t) ? bAscTime (localtime (t)) : NULL)
/* Spacing formatting */
extern int bJustifyLeft(bstring b, int space);
extern int bJustifyRight(bstring b, int width, int space);
extern int bJustifyMargin(bstring b, int width, int space);
extern int bJustifyCenter(bstring b, int width, int space);
/* Esoteric standards specific functions */
extern char * bStr2NetStr(const_bstring b);
extern bstring bNetStr2Bstr(const char * buf);
extern bstring bBase64Encode(const_bstring b);
extern bstring bBase64DecodeEx(const_bstring b, int * boolTruncError);
extern struct bStream * bsUuDecode(struct bStream * sInp, int * badlines);
extern bstring bUuDecodeEx(const_bstring src, int * badlines);
extern bstring bUuEncode(const_bstring src);
extern bstring bYEncode(const_bstring src);
extern bstring bYDecode(const_bstring src);
/* Writable stream */
typedef int (* bNwrite) (const void * buf, size_t elsize, size_t nelem, void * parm);
struct bwriteStream * bwsOpen(bNwrite writeFn, void * parm);
int bwsWriteBstr(struct bwriteStream * stream, const_bstring b);
int bwsWriteBlk(struct bwriteStream * stream, void * blk, int len);
int bwsWriteFlush(struct bwriteStream * stream);
int bwsIsEOF(const struct bwriteStream * stream);
int bwsBuffLength(struct bwriteStream * stream, int sz);
void * bwsClose(struct bwriteStream * stream);
/* Security functions */
#define bSecureDestroy(b) { \
bstring bstr__tmp = (b); \
if (bstr__tmp && bstr__tmp->mlen > 0 && bstr__tmp->data) { \
(void) memset (bstr__tmp->data, 0, (size_t) bstr__tmp->mlen); \
bdestroy (bstr__tmp); \
} \
}
#define bSecureWriteProtect(t) { \
if ((t).mlen >= 0) { \
if ((t).mlen > (t).slen)) { \
(void) memset ((t).data + (t).slen, 0, (size_t) (t).mlen - (t).slen); \
} \
(t).mlen = -1; \
} \
}
extern bstring bSecureInput(int maxlen, int termchar,
bNgetc vgetchar, void * vgcCtx);
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,306 +0,0 @@
/*
* This source file is part of the bstring string library. This code was
* written by Paul Hsieh in 2002-2010, and is covered by either the 3-clause
* BSD open source license or GPL v2.0. Refer to the accompanying documentation
* for details on usage and license.
*/
/*
* bstrlib.h
*
* This file is the header file for the core module for implementing the
* bstring functions.
*/
#ifndef BSTRLIB_INCLUDE
#define BSTRLIB_INCLUDE
#ifdef __cplusplus
extern "C" {
#endif
#include <stdarg.h>
#include <string.h>
#include <limits.h>
#include <ctype.h>
#if !defined(BSTRLIB_VSNP_OK) && !defined(BSTRLIB_NOVSNP)
# if defined(__TURBOC__) && !defined(__BORLANDC__)
# define BSTRLIB_NOVSNP
# endif
#endif
#define BSTR_ERR (-1)
#define BSTR_OK (0)
#define BSTR_BS_BUFF_LENGTH_GET (0)
typedef struct tagbstring * bstring;
typedef const struct tagbstring * const_bstring;
/* Copy functions */
#define cstr2bstr bfromcstr
extern bstring bfromcstr(const char * str);
extern bstring bfromcstralloc(int mlen, const char * str);
extern bstring blk2bstr(const void * blk, int len);
extern char * bstr2cstr(const_bstring s, char z);
extern int bcstrfree(char * s);
extern bstring bstrcpy(const_bstring b1);
extern int bassign(bstring a, const_bstring b);
extern int bassignmidstr(bstring a, const_bstring b, int left, int len);
extern int bassigncstr(bstring a, const char * str);
extern int bassignblk(bstring a, const void * s, int len);
/* Destroy function */
extern int bdestroy(bstring b);
/* Space allocation hinting functions */
extern int balloc(bstring s, int len);
extern int ballocmin(bstring b, int len);
/* Substring extraction */
extern bstring bmidstr(const_bstring b, int left, int len);
/* Various standard manipulations */
extern int bconcat(bstring b0, const_bstring b1);
extern int bconchar(bstring b0, char c);
extern int bcatcstr(bstring b, const char * s);
extern int bcatblk(bstring b, const void * s, int len);
extern int binsert(bstring s1, int pos, const_bstring s2, unsigned char fill);
extern int binsertch(bstring s1, int pos, int len, unsigned char fill);
extern int breplace(bstring b1, int pos, int len, const_bstring b2, unsigned char fill);
extern int bdelete(bstring s1, int pos, int len);
extern int bsetstr(bstring b0, int pos, const_bstring b1, unsigned char fill);
extern int btrunc(bstring b, int n);
/* Scan/search functions */
extern int bstricmp(const_bstring b0, const_bstring b1);
extern int bstrnicmp(const_bstring b0, const_bstring b1, int n);
extern int biseqcaseless(const_bstring b0, const_bstring b1);
extern int bisstemeqcaselessblk(const_bstring b0, const void * blk, int len);
extern int biseq(const_bstring b0, const_bstring b1);
extern int bisstemeqblk(const_bstring b0, const void * blk, int len);
extern int biseqcstr(const_bstring b, const char * s);
extern int biseqcstrcaseless(const_bstring b, const char * s);
extern int bstrcmp(const_bstring b0, const_bstring b1);
extern int bstrncmp(const_bstring b0, const_bstring b1, int n);
extern int binstr(const_bstring s1, int pos, const_bstring s2);
extern int binstrr(const_bstring s1, int pos, const_bstring s2);
extern int binstrcaseless(const_bstring s1, int pos, const_bstring s2);
extern int binstrrcaseless(const_bstring s1, int pos, const_bstring s2);
extern int bstrchrp(const_bstring b, int c, int pos);
extern int bstrrchrp(const_bstring b, int c, int pos);
#define bstrchr(b, c) bstrchrp ((b), (c), 0)
#define bstrrchr(b, c) bstrrchrp ((b), (c), blength(b)-1)
extern int binchr(const_bstring b0, int pos, const_bstring b1);
extern int binchrr(const_bstring b0, int pos, const_bstring b1);
extern int bninchr(const_bstring b0, int pos, const_bstring b1);
extern int bninchrr(const_bstring b0, int pos, const_bstring b1);
extern int bfindreplace(bstring b, const_bstring find, const_bstring repl, int pos);
extern int bfindreplacecaseless(bstring b, const_bstring find, const_bstring repl, int pos);
/* List of string container functions */
struct bstrList
{
int qty, mlen;
bstring * entry;
};
extern struct bstrList * bstrListCreate(void);
extern int bstrListDestroy(struct bstrList * sl);
extern int bstrListAlloc(struct bstrList * sl, int msz);
extern int bstrListAllocMin(struct bstrList * sl, int msz);
/* String split and join functions */
extern struct bstrList * bsplit(const_bstring str, unsigned char splitChar);
extern struct bstrList * bsplits(const_bstring str, const_bstring splitStr);
extern struct bstrList * bsplitstr(const_bstring str, const_bstring splitStr);
extern bstring bjoin(const struct bstrList * bl, const_bstring sep);
extern int bsplitcb(const_bstring str, unsigned char splitChar, int pos,
int (* cb)(void * parm, int ofs, int len), void * parm);
extern int bsplitscb(const_bstring str, const_bstring splitStr, int pos,
int (* cb)(void * parm, int ofs, int len), void * parm);
extern int bsplitstrcb(const_bstring str, const_bstring splitStr, int pos,
int (* cb)(void * parm, int ofs, int len), void * parm);
/* Miscellaneous functions */
extern int bpattern(bstring b, int len);
extern int btoupper(bstring b);
extern int btolower(bstring b);
extern int bltrimws(bstring b);
extern int brtrimws(bstring b);
extern int btrimws(bstring b);
/* <*>printf format functions */
#if !defined(BSTRLIB_NOVSNP)
extern bstring bformat(const char * fmt, ...);
extern int bformata(bstring b, const char * fmt, ...);
extern int bassignformat(bstring b, const char * fmt, ...);
extern int bvcformata(bstring b, int count, const char * fmt, va_list arglist);
#define bvformata(ret, b, fmt, lastarg) { \
bstring bstrtmp_b = (b); \
const char * bstrtmp_fmt = (fmt); \
int bstrtmp_r = BSTR_ERR, bstrtmp_sz = 16; \
for (;;) { \
va_list bstrtmp_arglist; \
va_start (bstrtmp_arglist, lastarg); \
bstrtmp_r = bvcformata (bstrtmp_b, bstrtmp_sz, bstrtmp_fmt, bstrtmp_arglist); \
va_end (bstrtmp_arglist); \
if (bstrtmp_r >= 0) { /* Everything went ok */ \
bstrtmp_r = BSTR_OK; \
break; \
} else if (-bstrtmp_r <= bstrtmp_sz) { /* A real error? */ \
bstrtmp_r = BSTR_ERR; \
break; \
} \
bstrtmp_sz = -bstrtmp_r; /* Doubled or target size */ \
} \
ret = bstrtmp_r; \
}
#endif
typedef int (*bNgetc) (void *parm);
typedef size_t (* bNread) (void *buff, size_t elsize, size_t nelem, void *parm);
/* Input functions */
extern bstring bgets(bNgetc getcPtr, void * parm, char terminator);
extern bstring bread(bNread readPtr, void * parm);
extern int bgetsa(bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int bassigngets(bstring b, bNgetc getcPtr, void * parm, char terminator);
extern int breada(bstring b, bNread readPtr, void * parm);
/* Stream functions */
extern struct bStream * bsopen(bNread readPtr, void * parm);
extern void * bsclose(struct bStream * s);
extern int bsbufflength(struct bStream * s, int sz);
extern int bsreadln(bstring b, struct bStream * s, char terminator);
extern int bsreadlns(bstring r, struct bStream * s, const_bstring term);
extern int bsread(bstring b, struct bStream * s, int n);
extern int bsreadlna(bstring b, struct bStream * s, char terminator);
extern int bsreadlnsa(bstring r, struct bStream * s, const_bstring term);
extern int bsreada(bstring b, struct bStream * s, int n);
extern int bsunread(struct bStream * s, const_bstring b);
extern int bspeek(bstring r, const struct bStream * s);
extern int bssplitscb(struct bStream * s, const_bstring splitStr,
int (* cb)(void * parm, int ofs, const_bstring entry), void * parm);
extern int bssplitstrcb(struct bStream * s, const_bstring splitStr,
int (* cb)(void * parm, int ofs, const_bstring entry), void * parm);
extern int bseof(const struct bStream * s);
struct tagbstring
{
int mlen;
int slen;
unsigned char * data;
};
/* Accessor macros */
#define blengthe(b, e) (((b) == (void *)0 || (b)->slen < 0) ? (int)(e) : ((b)->slen))
#define blength(b) (blengthe ((b), 0))
#define bdataofse(b, o, e) (((b) == (void *)0 || (b)->data == (void*)0) ? (char *)(e) : ((char *)(b)->data) + (o))
#define bdataofs(b, o) (bdataofse ((b), (o), (void *)0))
#define bdatae(b, e) (bdataofse (b, 0, e))
#define bdata(b) (bdataofs (b, 0))
#define bchare(b, p, e) ((((unsigned)(p)) < (unsigned)blength(b)) ? ((b)->data[(p)]) : (e))
#define bchar(b, p) bchare ((b), (p), '\0')
/* Static constant string initialization macro */
#define bsStaticMlen(q, m) {(m), (int) sizeof(q)-1, (unsigned char *) ("" q "")}
#if defined(_MSC_VER)
/* There are many versions of MSVC which emit __LINE__ as a non-constant. */
# define bsStatic(q) bsStaticMlen(q,-32)
#endif
#ifndef bsStatic
# define bsStatic(q) bsStaticMlen(q,-__LINE__)
#endif
/* Static constant block parameter pair */
#define bsStaticBlkParms(q) ((void *)("" q "")), ((int) sizeof(q)-1)
/* Reference building macros */
#define cstr2tbstr btfromcstr
#define btfromcstr(t, s) { \
(t).data = (unsigned char *) (s); \
(t).slen = ((t).data) ? ((int) (strlen) ((char *)(t).data)) : 0; \
(t).mlen = -1; \
}
#define blk2tbstr(t, s, l) { \
(t).data = (unsigned char *) (s); \
(t).slen = l; \
(t).mlen = -1; \
}
#define btfromblk(t, s, l) blk2tbstr(t,s,l)
#define bmid2tbstr(t, b, p, l) { \
const_bstring bstrtmp_s = (b); \
if (bstrtmp_s && bstrtmp_s->data && bstrtmp_s->slen >= 0) { \
int bstrtmp_left = (p); \
int bstrtmp_len = (l); \
if (bstrtmp_left < 0) { \
bstrtmp_len += bstrtmp_left; \
bstrtmp_left = 0; \
} \
if (bstrtmp_len > bstrtmp_s->slen - bstrtmp_left) \
bstrtmp_len = bstrtmp_s->slen - bstrtmp_left; \
if (bstrtmp_len <= 0) { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} else { \
(t).data = bstrtmp_s->data + bstrtmp_left; \
(t).slen = bstrtmp_len; \
} \
} else { \
(t).data = (unsigned char *)""; \
(t).slen = 0; \
} \
(t).mlen = -__LINE__; \
}
#define btfromblkltrimws(t, s, l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l); \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx < bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
#define btfromblkrtrimws(t, s, l) { \
int bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_len >= 0; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s; \
(t).slen = bstrtmp_len + 1; \
(t).mlen = -__LINE__; \
}
#define btfromblktrimws(t, s, l) { \
int bstrtmp_idx = 0, bstrtmp_len = (l) - 1; \
unsigned char * bstrtmp_s = (s); \
if (bstrtmp_s && bstrtmp_len >= 0) { \
for (; bstrtmp_idx <= bstrtmp_len; bstrtmp_idx++) { \
if (!isspace (bstrtmp_s[bstrtmp_idx])) break; \
} \
for (; bstrtmp_len >= bstrtmp_idx; bstrtmp_len--) { \
if (!isspace (bstrtmp_s[bstrtmp_len])) break; \
} \
} \
(t).data = bstrtmp_s + bstrtmp_idx; \
(t).slen = bstrtmp_len + 1 - bstrtmp_idx; \
(t).mlen = -__LINE__; \
}
/* Write protection macros */
#define bwriteprotect(t) { if ((t).mlen >= 0) (t).mlen = -1; }
#define bwriteallow(t) { if ((t).mlen == -1) (t).mlen = (t).slen + ((t).slen == 0); }
#define biswriteprotected(t) ((t).mlen <= 0)
#ifdef __cplusplus
}
#endif
#endif

File diff suppressed because it is too large Load Diff

View File

@ -1,29 +0,0 @@
Copyright (c) 2002-2008 Paul Hsieh
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions are met:
Redistributions of source code must retain the above copyright notice,
this list of conditions and the following disclaimer.
Redistributions in binary form must reproduce the above copyright notice,
this list of conditions and the following disclaimer in the documentation
and/or other materials provided with the distribution.
Neither the name of bstrlib nor the names of its contributors may be used
to endorse or promote products derived from this software without
specific prior written permission.
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
POSSIBILITY OF SUCH DAMAGE.

View File

@ -1,172 +0,0 @@
Better String library Porting Guide
-----------------------------------
by Paul Hsieh
The bstring library is an attempt to provide improved string processing
functionality to the C and C++ language. At the heart of the bstring library
is the management of "bstring"s which are a significant improvement over '\0'
terminated char buffers. See the accompanying documenation file bstrlib.txt
for more information.
===============================================================================
Identifying the Compiler
------------------------
Bstrlib has been tested on the following compilers:
Microsoft Visual C++
Watcom C/C++ (32 bit flat)
Intel's C/C++ compiler (on Windows)
The GNU C/C++ compiler (on Windows/Linux on x86 and PPC64)
Borland C++
Turbo C
There are slight differences in these compilers which requires slight
differences in the implementation of Bstrlib. These are accomodated in the
same sources using #ifdef/#if defined() on compiler specific macros. To
port Bstrlib to a new compiler not listed above, it is recommended that the
same strategy be followed. If you are unaware of the compiler specific
identifying preprocessor macro for your compiler you might find it here:
http://predef.sourceforge.net/precomp.html
Note that Intel C/C++ on Windows sets the Microsoft identifier: _MSC_VER.
16-bit vs. 32-bit vs. 64-bit Systems
------------------------------------
Bstrlib has been architected to deal with strings of length between 0 and
INT_MAX (inclusive). Since the values of int are never higher than size_t
there will be no issue here. Note that on most 64-bit systems int is 32-bit.
Dependency on The C-Library
---------------------------
Bstrlib uses the functions memcpy, memmove, malloc, realloc, free and
vsnprintf. Many free standing C compiler implementations that have a mode in
which the C library is not available will typically not include these
functions which will make porting Bstrlib to it onerous. Bstrlib is not
designed for such bare bones compiler environments. This usually includes
compilers that target ROM environments.
Porting Issues
--------------
Bstrlib has been written completely in ANSI/ISO C and ISO C++, however, there
are still a few porting issues. These are described below.
1. The vsnprintf () function.
Unfortunately, the earlier ANSI/ISO C standards did not include this function.
If the compiler of interest does not support this function then the
BSTRLIB_NOVSNP should be defined via something like:
#if !defined (BSTRLIB_VSNP_OK) && !defined (BSTRLIB_NOVSNP)
# if defined (__TURBOC__) || defined (__COMPILERVENDORSPECIFICMACRO__)
# define BSTRLIB_NOVSNP
# endif
#endif
which appears at the top of bstrlib.h. Note that the bformat(a) functions
will not be declared or implemented if the BSTRLIB_NOVSNP macro is set. If
the compiler has renamed vsnprintf() to some other named function, then
search for the definition of the exvsnprintf macro in bstrlib.c file and be
sure its defined appropriately:
#if defined (__COMPILERVENDORSPECIFICMACRO__)
# define exvsnprintf(r,b,n,f,a) {r=__compiler_specific_vsnprintf(b,n,f,a);}
#else
# define exvsnprintf(r,b,n,f,a) {r=vsnprintf(b,n,f,a);}
#endif
Take notice of the return value being captured in the variable r. It is
assumed that r exceeds n if and only if the underlying vsnprintf function has
determined what the true maximal output length would be for output if the
buffer were large enough to hold it. Non-modern implementations must output a
lesser number (the macro can and should be modified to ensure this).
2. Weak C++ compiler.
C++ is a much more complicated language to implement than C. This has lead
to varying quality of compiler implementations. The weaknesses isolated in
the initial ports are inclusion of the Standard Template Library,
std::iostream and exception handling. By default it is assumed that the C++
compiler supports all of these things correctly. If your compiler does not
support one or more of these define the corresponding macro:
BSTRLIB_CANNOT_USE_STL
BSTRLIB_CANNOT_USE_IOSTREAM
BSTRLIB_DOESNT_THROW_EXCEPTIONS
The compiler specific detected macro should be defined at the top of
bstrwrap.h in the Configuration defines section. Note that these disabling
macros can be overrided with the associated enabling macro if a subsequent
version of the compiler gains support. (For example, its possible to rig
up STLport to provide STL support for WATCOM C/C++, so -DBSTRLIB_CAN_USE_STL
can be passed in as a compiler option.)
3. The bsafe module, and reserved words.
The bsafe module is in gross violation of the ANSI/ISO C standard in the
sense that it redefines what could be implemented as reserved words on a
given compiler. The typical problem is that a compiler may inline some of the
functions and thus not be properly overridden by the definitions in the bsafe
module. It is also possible that a compiler may prohibit the redefinitions in
the bsafe module. Compiler specific action will be required to deal with
these situations.
Platform Specific Files
-----------------------
The makefiles for the examples are basically setup of for particular
environments for each platform. In general these makefiles are not portable
and should be constructed as necessary from scratch for each platform.
Testing a port
--------------
To test that a port compiles correctly do the following:
1. Build a sample project that includes the bstrlib, bstraux, bstrwrap, and
bsafe modules.
2. Compile bstest against the bstrlib module.
3. Run bstest and ensure that 0 errors are reported.
4. Compile test against the bstrlib and bstrwrap modules.
5. Run test and ensure that 0 errors are reported.
6. Compile each of the examples (except for the "re" example, which may be
complicated and is not a real test of bstrlib and except for the mfcbench
example which is Windows specific.)
7. Run each of the examples.
The builds must have 0 errors, and should have the absolute minimum number of
warnings (in most cases can be reduced to 0.) The result of execution should
be essentially identical on each platform.
Performance
-----------
Different CPU and compilers have different capabilities in terms of
performance. It is possible for Bstrlib to assume performance
characteristics that a platform doesn't have (since it was primarily
developed on just one platform). The goal of Bstrlib is to provide very good
performance on all platforms regardless of this but without resorting to
extreme measures (such as using assembly language, or non-portable intrinsics
or library extensions.)
There are two performance benchmarks that can be found in the example/
directory. They are: cbench.c and cppbench.cpp. These are variations and
expansions of a benchmark for another string library. They don't cover all
string functionality, but do include the most basic functions which will be
common in most string manipulation kernels.
...............................................................................
Feedback
--------
In all cases, you may email issues found to the primary author of Bstrlib at
the email address: websnarf@users.sourceforge.net
===============================================================================

View File

@ -1,221 +0,0 @@
Better String library Security Statement
----------------------------------------
by Paul Hsieh
===============================================================================
Introduction
------------
The Better String library (hereafter referred to as Bstrlib) is an attempt to
provide improved string processing functionality to the C and C++ languages.
At the heart of the Bstrlib is the management of "bstring"s which are a
significant improvement over '\0' terminated char buffers. See the
accompanying documenation file bstrlib.txt for more information.
DISCLAIMER: THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND
CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT
NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS;
OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF
ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
Like any software, there is always a possibility of failure due to a flawed
implementation. Nevertheless a good faith effort has been made to minimize
such flaws in Bstrlib. Also, use of Bstrlib by itself will not make an
application secure or free from implementation failures. However, it is the
author's conviction that use of Bstrlib can greatly facilitate the creation
of software meeting the highest possible standards of security.
Part of the reason why this document has been created, is for the purpose of
security auditing, or the creation of further "Statements on Security" for
software that is created that uses Bstrlib. An auditor may check the claims
below against Bstrlib, and use this as a basis for analysis of software which
uses Bstrlib.
===============================================================================
Statement on Security
---------------------
This is a document intended to give consumers of the Better String Library
who are interested in security an idea of where the Better String Library
stands on various security issues. Any deviation observed in the actual
library itself from the descriptions below should be considered an
implementation error, not a design flaw.
This statement is not an analytical proof of correctness or an outline of one
but rather an assertion similar to a scientific claim or hypothesis. By use,
testing and open independent examination (otherwise known as scientific
falsifiability), the credibility of the claims made below can rise to the
level of an established theory.
Common security issues:
.......................
1. Buffer Overflows
The Bstrlib API allows the programmer a way to deal with strings without
having to deal with the buffers containing them. Ordinary usage of the
Bstrlib API itself makes buffer overflows impossible.
Furthermore, the Bstrlib API has a superset of basic string functionality as
compared to the C library's char * functions, C++'s std::string class and
Microsoft's MFC based CString class. It also has abstracted mechanisms for
dealing with IO. This is important as it gives developers a way of migrating
all their code from a functionality point of view.
2. Memory size overflow/wrap around attack
Bstrlib is, by design, impervious to memory size overflow attacks. The
reason is it is resiliant to length overflows is that bstring lengths are
bounded above by INT_MAX, instead of ~(size_t)0. So length addition
overflows cause a wrap around of the integer value making them negative
causing balloc() to fail before an erroneous operation can occurr. Attempted
conversions of char * strings which may have lengths greater than INT_MAX are
detected and the conversion is aborted.
It is unknown if this property holds on machines that don't represent
integers as 2s complement. It is recommended that Bstrlib be carefully
auditted by anyone using a system which is not 2s complement based.
3. Constant string protection
Bstrlib implements runtime enforced constant and read-only string semantics.
I.e., bstrings which are declared as constant via the bsStatic() macro cannot
be modified or deallocated directly through the Bstrlib API, and this cannot
be subverted by casting or other type coercion. This is independent of the
use of the const_bstring data type.
The Bstrlib C API uses the type const_bstring to specify bstring parameters
whose contents do not change. Although the C language cannot enforce this,
this is nevertheless guaranteed by the implementation of the Bstrlib library
of C functions. The C++ API enforces the const attribute on CBString types
correctly.
4. Aliased bstring support
Bstrlib detects and supports aliased parameter management throughout the API.
The kind of aliasing that is allowed is the one where pointers of the same
basic type may be pointing to overlapping objects (this is the assumption the
ANSI C99 specification makes.) Each function behaves as if all read-only
parameters were copied to temporaries which are used in their stead before
the function is enacted (it rarely actually does this). No function in the
Bstrlib uses the "restrict" parameter attribute from the ANSI C99
specification.
5. Information leaking
In bstraux.h, using the semantically equivalent macros bSecureDestroy() and
bSecureWriteProtect() in place of bdestroy() and bwriteprotect() respectively
will ensure that stale data does not linger in the heap's free space after
strings have been released back to memory. Created bstrings or CBStrings
are not linked to anything external to themselves, and thus cannot expose
deterministic data leaking. If a bstring is resized, the preimage may exist
as a copy that is released to the heap. Thus for sensitive data, the bstring
should be sufficiently presized before manipulated so that it is not resized.
bSecureInput() has been supplied in bstraux.c, which can be used to obtain
input securely without any risk of leaving any part of the input image in the
heap except for the allocated bstring that is returned.
6. Memory leaking
Bstrlib can be built using memdbg.h enabled via the BSTRLIB_MEMORY_DEBUG
macro. User generated definitions for malloc, realloc and free can then be
supplied which can implement special strategies for memory corruption
detection or memory leaking. Otherwise, bstrlib does not do anything out of
the ordinary to attempt to deal with the standard problem of memory leaking
(i.e., losing references to allocated memory) when programming in the C and
C++ languages. However, it does not compound the problem any more than exists
either, as it doesn't have any intrinsic inescapable leaks in it. Bstrlib
does not preclude the use of automatic garbage collection mechanisms such as
the Boehm garbage collector.
7. Encryption
Bstrlib does not present any built-in encryption mechanism. However, it
supports full binary contents in its data buffers, so any standard block
based encryption mechanism can make direct use of bstrings/CBStrings for
buffer management.
8. Double freeing
Freeing a pointer that is already free is an extremely rare, but nevertheless
a potentially ruthlessly corrupting operation (its possible to cause Win 98 to
reboot, by calling free mulitiple times on already freed data using the WATCOM
CRT.) Bstrlib invalidates the bstring header data before freeing, so that in
many cases a double free will be detected and an error will be reported
(though this behaviour is not guaranteed and should not be relied on).
Using bstrFree pervasively (instead of bdestroy) can lead to somewhat
improved invalid free avoidance (it is completely safe whenever bstring
instances are only stored in unique variables). For example:
struct tagbstring hw = bsStatic ("Hello, world");
bstring cpHw = bstrcpy (&hw);
#ifdef NOT_QUITE_AS_SAFE
bdestroy (cpHw); /* Never fail */
bdestroy (cpHw); /* Error sometimes detected at runtime */
bdestroy (&hw); /* Error detected at run time */
#else
bstrFree (cpHw); /* Never fail */
bstrFree (cpHw); /* Will do nothing */
bstrFree (&hw); /* Will lead to a compile time error */
#endif
9. Resource based denial of service
bSecureInput() has been supplied in bstraux.c. It has an optional upper limit
for input length. But unlike fgets(), it is also easily determined if the
buffer has been truncated early. In this way, a program can set an upper limit
on input sizes while still allowing for implementing context specific
truncation semantics (i.e., does the program consume but dump the extra
input, or does it consume it in later inputs?)
10. Mixing char *'s and bstrings
The bstring and char * representations are not identical. So there is a risk
when converting back and forth that data may lost. Essentially bstrings can
contain '\0' as a valid non-terminating character, while char * strings
cannot and in fact must use the character as a terminator. The risk of data
loss is very low, since:
A) the simple method of only using bstrings in a char * semantically
compatible way is both easy to achieve and pervasively supported.
B) obtaining '\0' content in a string is either deliberate or indicative
of another, likely more serious problem in the code.
C) the library comes with various functions which deal with this issue
(namely: bfromcstr(), bstr2cstr (), and bSetCstrChar ())
Marginal security issues:
.........................
11. 8-bit versus 9-bit portability
Bstrlib uses CHAR_BIT and other limits.h constants to the maximum extent
possible to avoid portability problems. However, Bstrlib has not been tested
on any system that does not represent char as 8-bits. So whether or not it
works on 9-bit systems is an open question. It is recommended that Bstrlib be
carefully auditted by anyone using a system in which CHAR_BIT is not 8.
12. EBCDIC/ASCII/UTF-8 data representation attacks.
Bstrlib uses ctype.h functions to ensure that it remains portable to non-
ASCII systems. It also checks range to make sure it is well defined even for
data that ANSI does not define for the ctype functions.
Obscure issues:
...............
13. Data attributes
There is no support for a Perl-like "taint" attribute, however, an example of
how to do this using C++'s type system is given as an example.

File diff suppressed because it is too large Load Diff

View File

@ -1,151 +0,0 @@
#pragma once
#include <set>
#include <map>
#include <utility>
#include <vector>
#include <memory>
#include <stdint.h>
struct Instruction;
class Operand;
namespace HLSLcc
{
using namespace std;
namespace ControlFlow
{
class BasicBlock;
class ControlFlowGraph
{
friend class BasicBlock;
public:
ControlFlowGraph()
: m_BlockMap()
, m_BlockStorage()
{}
typedef std::vector<shared_ptr<BasicBlock> > BasicBlockStorage;
const BasicBlock &Build(const Instruction* firstInstruction, const Instruction* endInstruction);
// Only works for instructions that start the basic block
const BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction) const;
// non-const version for BasicBlock
BasicBlock *GetBasicBlockForInstruction(const Instruction *instruction);
const BasicBlockStorage &AllBlocks() const { return m_BlockStorage; }
private:
// Map for storing the created basic blocks. Map key is the pointer to the first instruction in the block
typedef std::map<const Instruction *, BasicBlock *> BasicBlockMap;
BasicBlockMap m_BlockMap;
// auto_ptr -type storage for multiple BasicBlocks. BlockMap above only has pointers into these
BasicBlockStorage m_BlockStorage;
};
class BasicBlock
{
friend class ControlFlowGraph;
public:
// A set of register indices, one per each vec4 component per register
typedef std::set<uint32_t> RegisterSet;
// The connections (either incoming or outgoing) from this block. The instruction is the same one as the key in ControlFlowGraph to that basic block
typedef std::set<const Instruction *> ConnectionSet;
struct Definition
{
Definition(const Instruction* i = nullptr, const Operand* o = nullptr)
: m_Instruction(i)
, m_Operand(o)
{}
Definition(const Definition& a) = default;
Definition(Definition&& a) = default;
~Definition() = default;
Definition& operator=(const Definition& a) = default;
Definition& operator=(Definition&& a) = default;
bool operator==(const Definition& a) const
{
if (a.m_Instruction != m_Instruction)
return false;
return a.m_Operand == m_Operand;
}
bool operator!=(const Definition& a) const
{
if (a.m_Instruction == m_Instruction)
return false;
return a.m_Operand != m_Operand;
}
bool operator<(const Definition& a) const
{
if (m_Instruction != a.m_Instruction)
return m_Instruction < a.m_Instruction;
return m_Operand < a.m_Operand;
}
const Instruction *m_Instruction;
const Operand *m_Operand;
};
typedef std::set<Definition> ReachableDefinitionsPerVariable; // A set of possibly visible definitions for one component of one vec4 variable
typedef std::map<uint32_t, ReachableDefinitionsPerVariable> ReachableVariables; // A VisibleDefinitionSet for each variable*component.
const Instruction *First() const { return m_First; }
const Instruction *Last() const { return m_Last; }
const RegisterSet &UEVar() const { return m_UEVar; }
const RegisterSet &VarKill() const { return m_VarKill; }
const ConnectionSet &Preceding() const { return m_Preceding; }
const ConnectionSet &Succeeding() const { return m_Succeeding; }
const ReachableVariables &DEDef() const { return m_DEDef; }
const ReachableVariables &Reachable() const { return m_Reachable; }
// Helper function: Do union of 2 ReachableVariables, store result in a.
static void RVarUnion(ReachableVariables &a, const ReachableVariables &b);
private:
// Generate a basic block. Private constructor, can only be constructed from ControlFlowGraph::Build()
BasicBlock(const Instruction *psFirst, ControlFlowGraph &graph, const Instruction *psPrecedingBlockHead, const Instruction* psEnd);
// Walk through the instructions and build UEVar and VarKill sets, create succeeding nodes if they don't exist already.
void Build();
bool RebuildReachable(); // Rebuild m_Reachable from preceding blocks and this one. Returns true if current value changed.
BasicBlock * AddChildBasicBlock(const Instruction *psFirst);
private:
ControlFlowGraph &m_Graph; // The graph object containing this block
const Instruction *m_First; // The first instruction in the basic block
const Instruction *m_Last; // The last instruction in the basic block. Either OPCODE_RET or a branch/jump/loop instruction
const Instruction *m_End; // past-the-end pointer
RegisterSet m_UEVar; // Upwards-exposed variables (temps that need definition from upstream and are used in this basic block)
RegisterSet m_VarKill; // Set of variables that are defined in this block.
ConnectionSet m_Preceding; // Set of blocks that immediately precede this block in the CFG
ConnectionSet m_Succeeding; // Set of blocks that follow this block in the CFG
ReachableVariables m_DEDef; // Downward-exposed definitions from this basic block. Always only one item per set.
ReachableVariables m_Reachable; // The set of variable definitions that are visible at the end of this block.
};
}
}

View File

@ -1,30 +0,0 @@
#pragma once
struct Instruction;
namespace HLSLcc
{
namespace ControlFlow
{
class Utils
{
public:
// For a given flow-control instruction, find the corresponding jump location:
// If the input is OPCODE_IF, then find the next same-level ELSE or ENDIF +1
// For ELSE, find same level ENDIF + 1
// For BREAK/BREAKC, find next ENDLOOP or ENDSWITCH + 1
// For SWITCH, find next same-level CASE/DEFAULT (skip multiple consecutive case/default labels) or ENDSWITCH + 1
// For ENDLOOP, find previous same-level LOOP + 1
// For CASE/DEFAULT, find next same-level CASE/DEFAULT or ENDSWITCH + 1, skip multiple consecutive case/default labels
// For CONTINUE/C the previous LOOP + 1
// Note that LOOP/ENDSWITCH itself is nothing but a label but it still starts a new basic block.
// Note that CASE labels fall through.
// Always returns the beginning of the next block, so skip multiple CASE/DEFAULT labels etc.
// If sawEndSwitch != null, will bet set to true if the label skipping saw past ENDSWITCH
// If needConnectToParent != null, will be set to true if sawEndSwitch == true and there are one or more case labels directly before it.
static const Instruction * GetJumpPoint(const Instruction *psStart, bool *sawEndSwitch = 0, bool *needConnectToParent = 0);
static const Instruction *GetNextNonLabelInstruction(const Instruction *psStart, bool *sawEndSwitch = 0);
};
}
}

View File

@ -1,15 +0,0 @@
#pragma once
#include "include/ShaderInfo.h"
#include <vector>
class HLSLCrossCompilerContext;
struct Instruction;
namespace HLSLcc
{
namespace DataTypeAnalysis
{
void SetDataTypes(HLSLCrossCompilerContext* psContext, std::vector<Instruction> &instructions, uint32_t ui32TempCount, std::vector<SHADER_VARIABLE_TYPE> &results);
}
}

View File

@ -1,118 +0,0 @@
#pragma once
#include <vector>
#include <set>
#include "internal_includes/tokens.h"
#include "internal_includes/Operand.h"
typedef struct ICBVec4_TAG
{
uint32_t a;
uint32_t b;
uint32_t c;
uint32_t d;
} ICBVec4;
#define ACCESS_FLAG_READ 0x1
#define ACCESS_FLAG_WRITE 0x2
#define ACCESS_FLAG_ATOMIC 0x4
struct Declaration
{
Declaration() :
eOpcode(OPCODE_INVALID),
ui32NumOperands(0),
ui32BufferStride(0),
ui32TableLength(0),
ui32IsShadowTex(0)
{}
OPCODE_TYPE eOpcode;
uint32_t ui32NumOperands;
Operand asOperands[2];
std::vector<ICBVec4> asImmediateConstBuffer;
//The declaration can set one of these
//values depending on the opcode.
union
{
uint32_t ui32GlobalFlags;
uint32_t ui32NumTemps;
RESOURCE_DIMENSION eResourceDimension;
INTERPOLATION_MODE eInterpolation;
PRIMITIVE_TOPOLOGY eOutputPrimitiveTopology;
PRIMITIVE eInputPrimitive;
uint32_t ui32MaxOutputVertexCount;
TESSELLATOR_DOMAIN eTessDomain;
TESSELLATOR_PARTITIONING eTessPartitioning;
TESSELLATOR_OUTPUT_PRIMITIVE eTessOutPrim;
uint32_t aui32WorkGroupSize[3];
uint32_t ui32HullPhaseInstanceCount;
float fMaxTessFactor;
uint32_t ui32IndexRange;
uint32_t ui32GSInstanceCount;
SB_SAMPLER_MODE eSamplerMode; // For sampler declarations, the sampler mode.
struct Interface_TAG
{
uint32_t ui32InterfaceID;
uint32_t ui32NumFuncTables;
uint32_t ui32ArraySize;
} iface;
} value;
uint32_t ui32BufferStride;
struct UAV_TAG
{
UAV_TAG() :
ui32GloballyCoherentAccess(0),
bCounter(0),
Type(RETURN_TYPE_UNORM),
ui32NumComponents(0),
ui32AccessFlags(0)
{
}
uint32_t ui32GloballyCoherentAccess;
uint8_t bCounter;
RESOURCE_RETURN_TYPE Type;
uint32_t ui32NumComponents;
uint32_t ui32AccessFlags;
} sUAV;
struct TGSM_TAG
{
uint32_t ui32Stride;
uint32_t ui32Count;
TGSM_TAG() :
ui32Stride(0),
ui32Count(0)
{
}
} sTGSM;
struct IndexableTemp_TAG
{
uint32_t ui32RegIndex;
uint32_t ui32RegCount;
uint32_t ui32RegComponentSize;
IndexableTemp_TAG() :
ui32RegIndex(0),
ui32RegCount(0),
ui32RegComponentSize(0)
{
}
} sIdxTemp;
uint32_t ui32TableLength;
uint32_t ui32IsShadowTex;
// Set indexed by sampler register number.
std::set<uint32_t> samplersUsed;
};

View File

@ -1,81 +0,0 @@
#pragma once
#include <stdint.h>
#include <string>
#include <set>
#include "bstrlib.h"
class Shader;
class GLSLCrossDependencyData;
class ShaderPhase;
class Translator;
class Operand;
class HLSLccReflection;
class HLSLCrossCompilerContext
{
public:
HLSLCrossCompilerContext(HLSLccReflection &refl) :
glsl(nullptr),
extensions(nullptr),
beforeMain(nullptr),
currentGLSLString(nullptr),
currentPhase(0),
indent(0),
flags(0),
psShader(nullptr),
psDependencies(nullptr),
inputPrefix(nullptr),
outputPrefix(nullptr),
psTranslator(nullptr),
m_Reflection(refl)
{}
bstring glsl;
bstring extensions;
bstring beforeMain;
bstring* currentGLSLString;//either glsl or earlyMain of current phase
uint32_t currentPhase;
int indent;
unsigned int flags;
// Helper functions for checking flags
// Returns true if VULKAN_BINDINGS flag is set
bool IsVulkan() const;
// Helper functions for checking flags
// Returns true if HLSLCC_FLAG_NVN_TARGET flag is set
bool IsSwitch() const;
Shader* psShader;
GLSLCrossDependencyData* psDependencies;
const char *inputPrefix; // Prefix for shader inputs
const char *outputPrefix; // Prefix for shader outputs
void DoDataTypeAnalysis(ShaderPhase *psPhase);
void ReserveFramebufferFetchInputs();
void ClearDependencyData();
void AddIndentation();
// Currently active translator
Translator *psTranslator;
HLSLccReflection &m_Reflection; // Callbacks for bindings and diagnostic info
// Retrieve the name for which the input or output is declared as. Takes into account possible redirections.
std::string GetDeclaredInputName(const Operand* psOperand, int *piRebase, int iIgnoreRedirect, uint32_t *puiIgnoreSwizzle) const;
std::string GetDeclaredOutputName(const Operand* psOperand, int* stream, uint32_t *puiIgnoreSwizzle, int *piRebase, int iIgnoreRedirect) const;
bool OutputNeedsDeclaring(const Operand* psOperand, const int count);
bool RequireExtension(const std::string &extName);
bool EnableExtension(const std::string &extName);
private:
std::set<std::string> m_EnabledExtensions;
};

View File

@ -1,134 +0,0 @@
#pragma once
#include "hlslcc.h"
#include "bstrlib.h"
#include <vector>
#include <string>
#include <algorithm>
#include "internal_includes/Instruction.h"
#include "internal_includes/Operand.h"
class HLSLCrossCompilerContext;
struct ConstantBuffer;
namespace HLSLcc
{
uint32_t GetNumberBitsSet(uint32_t a);
uint32_t SVTTypeToFlag(const SHADER_VARIABLE_TYPE eType);
SHADER_VARIABLE_TYPE TypeFlagsToSVTType(const uint32_t typeflags);
const char * GetConstructorForType(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision = true);
const char * GetConstructorForTypeGLSL(const HLSLCrossCompilerContext *context, const SHADER_VARIABLE_TYPE eType, const int components, bool useGLSLPrecision);
const char * GetConstructorForTypeMetal(const SHADER_VARIABLE_TYPE eType, const int components);
std::string GetMatrixTypeName(const HLSLCrossCompilerContext *psContext, const SHADER_VARIABLE_TYPE eBaseType, const int columns, const int rows);
void AddSwizzleUsingElementCount(bstring dest, uint32_t count);
int WriteMaskToComponentCount(uint32_t writeMask);
uint32_t BuildComponentMaskFromElementCount(int count);
// Returns true if we can do direct assignment between types (mostly for mediump<->highp floats etc)
bool DoAssignmentDataTypesMatch(SHADER_VARIABLE_TYPE dest, SHADER_VARIABLE_TYPE src);
// Convert resource return type to SVT_ flags
uint32_t ResourceReturnTypeToFlag(const RESOURCE_RETURN_TYPE eType);
SHADER_VARIABLE_TYPE ResourceReturnTypeToSVTType(const RESOURCE_RETURN_TYPE eType, const REFLECT_RESOURCE_PRECISION ePrec);
RESOURCE_RETURN_TYPE SVTTypeToResourceReturnType(SHADER_VARIABLE_TYPE type);
REFLECT_RESOURCE_PRECISION SVTTypeToPrecision(SHADER_VARIABLE_TYPE type);
uint32_t ElemCountToAutoExpandFlag(uint32_t elemCount);
bool IsOperationCommutative(int /* OPCODE_TYPE */ eOpCode);
bool AreTempOperandsIdentical(const Operand * psA, const Operand * psB);
int GetNumTextureDimensions(int /* RESOURCE_DIMENSION */ eResDim);
SHADER_VARIABLE_TYPE SelectHigherType(SHADER_VARIABLE_TYPE a, SHADER_VARIABLE_TYPE b);
// Returns true if the instruction adds 1 to the destination temp register
bool IsAddOneInstruction(const Instruction *psInst);
bool CanDoDirectCast(const HLSLCrossCompilerContext *context, SHADER_VARIABLE_TYPE src, SHADER_VARIABLE_TYPE dest);
bool IsUnityFlexibleInstancingBuffer(const ConstantBuffer* psCBuf);
// Helper function to print floats with full precision
void PrintFloat(bstring b, float f);
bstring GetEarlyMain(HLSLCrossCompilerContext *psContext);
bstring GetPostShaderCode(HLSLCrossCompilerContext *psContext);
// Flags for ForeachOperand
// Process suboperands
#define FEO_FLAG_SUBOPERAND 1
// Process src operands
#define FEO_FLAG_SRC_OPERAND 2
// Process destination operands
#define FEO_FLAG_DEST_OPERAND 4
// Convenience: Process all operands, both src and dest, and all suboperands
#define FEO_FLAG_ALL (FEO_FLAG_SUBOPERAND | FEO_FLAG_SRC_OPERAND | FEO_FLAG_DEST_OPERAND)
// For_each for all operands within a range of instructions. Flags above.
template<typename ItrType, typename F> void ForEachOperand(ItrType _begin, ItrType _end, int flags, F callback)
{
ItrType inst = _begin;
while (inst != _end)
{
uint32_t i, k;
if ((flags & FEO_FLAG_DEST_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
{
for (i = 0; i < inst->ui32FirstSrc; i++)
{
if (flags & FEO_FLAG_SUBOPERAND)
{
for (k = 0; k < MAX_SUB_OPERANDS; k++)
{
if (inst->asOperands[i].m_SubOperands[k].get())
{
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
}
}
}
if (flags & FEO_FLAG_DEST_OPERAND)
{
callback(inst, &inst->asOperands[i], FEO_FLAG_DEST_OPERAND);
}
}
}
if ((flags & FEO_FLAG_SRC_OPERAND) || (flags & FEO_FLAG_SUBOPERAND))
{
for (i = inst->ui32FirstSrc; i < inst->ui32NumOperands; i++)
{
if (flags & FEO_FLAG_SUBOPERAND)
{
for (k = 0; k < MAX_SUB_OPERANDS; k++)
{
if (inst->asOperands[i].m_SubOperands[k].get())
{
callback(inst, inst->asOperands[i].m_SubOperands[k].get(), FEO_FLAG_SUBOPERAND);
}
}
}
if (flags & FEO_FLAG_SRC_OPERAND)
{
callback(inst, &inst->asOperands[i], FEO_FLAG_SRC_OPERAND);
}
}
}
inst++;
}
}
}

View File

@ -1,184 +0,0 @@
#pragma once
#include "internal_includes/Operand.h"
#include "internal_includes/tokens.h"
#include "include/ShaderInfo.h"
#include <memory>
#define ATOMIC_ADDRESS_BASIC 0
#define ATOMIC_ADDRESS_ARRAY_DYNAMIC 1
#define ATOMIC_ADDRESS_STRUCT_DYNAMIC 2
#define TEXSMP_FLAG_NONE 0x0
#define TEXSMP_FLAG_LOD 0x1 //LOD comes from operand
#define TEXSMP_FLAG_DEPTHCOMPARE 0x2
#define TEXSMP_FLAG_FIRSTLOD 0x4 //LOD is 0
#define TEXSMP_FLAG_BIAS 0x8
#define TEXSMP_FLAG_GRAD 0x10
//Gather specific flags
#define TEXSMP_FLAG_GATHER 0x20
#define TEXSMP_FLAG_PARAMOFFSET 0x40 //Offset comes from operand
struct Instruction
{
Instruction() :
eOpcode(OPCODE_NOP),
eBooleanTestType(INSTRUCTION_TEST_ZERO),
ui32NumOperands(0),
ui32FirstSrc(0),
m_Uses(),
m_SkipTranslation(false),
m_InductorRegister(0),
bSaturate(0),
ui32SyncFlags(0),
ui32PreciseMask(0),
ui32FuncIndexWithinInterface(0),
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
bAddressOffset(0),
iUAddrOffset(0),
iVAddrOffset(0),
iWAddrOffset(0),
xType(RETURN_TYPE_UNUSED),
yType(RETURN_TYPE_UNUSED),
zType(RETURN_TYPE_UNUSED),
wType(RETURN_TYPE_UNUSED),
eResDim(RESOURCE_DIMENSION_UNKNOWN),
iCausedSplit(0),
id(0)
{
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
}
// For creating unit tests only. Create an instruction with temps (unless reg is 0xffffffff in which case use OPERAND_TYPE_INPUT/OUTPUT)
Instruction(uint64_t _id, OPCODE_TYPE opcode, uint32_t reg1 = 0, uint32_t reg1Mask = 0, uint32_t reg2 = 0, uint32_t reg2Mask = 0, uint32_t reg3 = 0, uint32_t reg3Mask = 0, uint32_t reg4 = 0, uint32_t reg4Mask = 0) :
ui32SyncFlags(0),
bSaturate(0),
ui32PreciseMask(0),
ui32FuncIndexWithinInterface(0),
eResInfoReturnType(RESINFO_INSTRUCTION_RETURN_FLOAT),
bAddressOffset(0),
iUAddrOffset(0),
iVAddrOffset(0),
iWAddrOffset(0),
xType(RETURN_TYPE_UNUSED),
yType(RETURN_TYPE_UNUSED),
zType(RETURN_TYPE_UNUSED),
wType(RETURN_TYPE_UNUSED),
eResDim(RESOURCE_DIMENSION_UNKNOWN),
iCausedSplit(0)
{
id = _id;
eOpcode = opcode;
eBooleanTestType = INSTRUCTION_TEST_ZERO;
ui32FirstSrc = 0;
ui32NumOperands = 0;
m_LoopInductors[0] = m_LoopInductors[1] = m_LoopInductors[2] = m_LoopInductors[3] = 0;
m_SkipTranslation = false;
m_InductorRegister = 0;
if (reg1Mask == 0)
return;
ui32NumOperands++;
asOperands[0].eType = reg1 == 0xffffffff ? OPERAND_TYPE_OUTPUT : OPERAND_TYPE_TEMP;
asOperands[0].ui32RegisterNumber = reg1 == 0xffffffff ? 0 : reg1;
asOperands[0].ui32CompMask = reg1Mask;
asOperands[0].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg2Mask == 0)
return;
ui32FirstSrc = 1;
ui32NumOperands++;
asOperands[1].eType = reg2 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[1].ui32RegisterNumber = reg2 == 0xffffffff ? 0 : reg2;
asOperands[1].ui32CompMask = reg2Mask;
asOperands[1].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg3Mask == 0)
return;
ui32NumOperands++;
asOperands[2].eType = reg3 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[2].ui32RegisterNumber = reg3 == 0xffffffff ? 0 : reg3;
asOperands[2].ui32CompMask = reg3Mask;
asOperands[2].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
if (reg4Mask == 0)
return;
ui32NumOperands++;
asOperands[3].eType = reg4 == 0xffffffff ? OPERAND_TYPE_INPUT : OPERAND_TYPE_TEMP;
asOperands[3].ui32RegisterNumber = reg4 == 0xffffffff ? 0 : reg4;
asOperands[3].ui32CompMask = reg4Mask;
asOperands[3].eSelMode = OPERAND_4_COMPONENT_MASK_MODE;
}
// Returns true if this instruction is a conditional branch
bool IsConditionalBranchInstruction() const
{
switch (eOpcode)
{
case OPCODE_IF:
case OPCODE_BREAKC:
case OPCODE_CONTINUEC:
case OPCODE_RETC:
return true;
default:
return false;
}
}
bool IsPartialPrecisionSamplerInstruction(const ShaderInfo &info, OPERAND_MIN_PRECISION *pType) const;
// Flags for ChangeOperandTempRegister
#define UD_CHANGE_SUBOPERANDS 1
#define UD_CHANGE_MAIN_OPERAND 2
#define UD_CHANGE_ALL 3
void ChangeOperandTempRegister(Operand *psOperand, uint32_t oldReg, uint32_t newReg, uint32_t compMask, uint32_t flags, uint32_t rebase);
OPCODE_TYPE eOpcode;
INSTRUCTION_TEST_BOOLEAN eBooleanTestType;
uint32_t ui32SyncFlags;
uint32_t ui32NumOperands;
uint32_t ui32FirstSrc;
Operand asOperands[6];
uint32_t bSaturate;
uint32_t ui32PreciseMask;
uint32_t ui32FuncIndexWithinInterface;
RESINFO_RETURN_TYPE eResInfoReturnType;
int bAddressOffset;
int8_t iUAddrOffset;
int8_t iVAddrOffset;
int8_t iWAddrOffset;
RESOURCE_RETURN_TYPE xType, yType, zType, wType;
RESOURCE_DIMENSION eResDim;
int8_t iCausedSplit; // Nonzero if has caused a temp split. Later used by sampler datatype tweaking
struct Use
{
Use() : m_Inst(0), m_Op(0) {}
Use(const Use& a) = default;
Use(Use&& a) = default;
Use(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
~Use() = default;
Use& operator=(const Use& a) = default;
Use& operator=(Use&& a) = default;
Instruction* m_Inst; // The instruction that references the result of this instruction
Operand* m_Op; // The operand within the instruction above. Note: can also be suboperand.
};
std::vector<Use> m_Uses; // Array of use sites for the result(s) of this instruction, if any of the results is a temp reg.
Instruction* m_LoopInductors[4]; // If OPCODE_LOOP and is suitable for transforming into for-loop, contains pointers to for initializer, end condition, breakc, and increment.
bool m_SkipTranslation; // If true, don't emit this instruction (currently used by the for loop translation)
uint32_t m_InductorRegister; // If non-zero, the inductor variable can be declared in the for statement, and this register number has been allocated for it
uint64_t id;
};

View File

@ -1,8 +0,0 @@
#pragma once
class ShaderPhase;
class HLSLCrossCompilerContext;
namespace HLSLcc
{
void DoLoopTransform(HLSLCrossCompilerContext *psContext, ShaderPhase &phase);
}

View File

@ -1,150 +0,0 @@
#pragma once
#include "internal_includes/tokens.h"
#include <vector>
#include <memory>
enum { MAX_SUB_OPERANDS = 3 };
class Operand;
class HLSLCrossCompilerContext;
struct Instruction;
#if _MSC_VER
// We want to disable the "array will be default-initialized" warning, as that's exactly what we want
#pragma warning(disable: 4351)
#endif
class Operand
{
public:
typedef std::shared_ptr<Operand> SubOperandPtr;
Operand()
:
iExtended(),
eType(),
eModifier(),
eMinPrecision(),
iIndexDims(),
iWriteMask(),
iGSInput(),
iPSInOut(),
iWriteMaskEnabled(),
iArrayElements(),
iNumComponents(),
eSelMode(),
ui32CompMask(),
ui32Swizzle(),
aui32Swizzle(),
aui32ArraySizes(),
ui32RegisterNumber(),
afImmediates(),
adImmediates(),
eSpecialName(),
specialName(),
eIndexRep(),
m_SubOperands(),
aeDataType(),
m_Rebase(0),
m_Size(0),
m_Defines(),
m_ForLoopInductorName(0)
#ifdef _DEBUG
, id(0)
#endif
{}
// Retrieve the mask of all the components this operand accesses (either reads from or writes to).
// Note that destination writemask does affect the effective access mask.
uint32_t GetAccessMask() const;
// Returns the index of the highest accessed component, based on component mask
int GetMaxComponent() const;
bool IsSwizzleReplicated() const;
// Get the number of elements returned by operand, taking additional component mask into account
//e.g.
//.z = 1
//.x = 1
//.yw = 2
uint32_t GetNumSwizzleElements(uint32_t ui32CompMask = OPERAND_4_COMPONENT_MASK_ALL) const;
// When this operand is used as an input declaration, how many components does it have?
int GetNumInputElements(const HLSLCrossCompilerContext *psContext) const;
// Retrieve the operand data type.
SHADER_VARIABLE_TYPE GetDataType(HLSLCrossCompilerContext* psContext, SHADER_VARIABLE_TYPE ePreferredTypeForImmediates = SVT_INT) const;
// Returns 0 if the register used by the operand is per-vertex, or 1 if per-patch
int GetRegisterSpace(const HLSLCrossCompilerContext *psContext) const;
// Same as above but with explicit shader type and phase
int GetRegisterSpace(SHADER_TYPE eShaderType, SHADER_PHASE_TYPE eShaderPhaseType) const;
// Find the operand that contains the dynamic index for this operand (array in constant buffer).
// When isAoS is true, we'll try to find the original index var to avoid additional calculations.
// needsIndexCalcRevert output will tell if we need to divide the value to get the correct index.
Operand* GetDynamicIndexOperand(HLSLCrossCompilerContext *psContext, const ShaderVarType* psVar, bool isAoS, bool *needsIndexCalcRevert) const;
// Maps REFLECT_RESOURCE_PRECISION into OPERAND_MIN_PRECISION as much as possible
static OPERAND_MIN_PRECISION ResourcePrecisionToOperandPrecision(REFLECT_RESOURCE_PRECISION ePrec);
int iExtended;
OPERAND_TYPE eType;
OPERAND_MODIFIER eModifier;
OPERAND_MIN_PRECISION eMinPrecision;
int iIndexDims;
int iWriteMask;
int iGSInput;
int iPSInOut;
int iWriteMaskEnabled;
int iArrayElements;
int iNumComponents;
OPERAND_4_COMPONENT_SELECTION_MODE eSelMode;
uint32_t ui32CompMask;
uint32_t ui32Swizzle;
uint32_t aui32Swizzle[4];
uint32_t aui32ArraySizes[3];
uint32_t ui32RegisterNumber;
//If eType is OPERAND_TYPE_IMMEDIATE32
float afImmediates[4];
//If eType is OPERAND_TYPE_IMMEDIATE64
double adImmediates[4];
SPECIAL_NAME eSpecialName;
std::string specialName;
OPERAND_INDEX_REPRESENTATION eIndexRep[3];
SubOperandPtr m_SubOperands[MAX_SUB_OPERANDS];
//One type for each component.
SHADER_VARIABLE_TYPE aeDataType[4];
uint32_t m_Rebase; // Rebase value, for constant array accesses.
uint32_t m_Size; // Component count, only for constant array access.
struct Define
{
Define() : m_Inst(0), m_Op(0) {}
Define(const Define& a) = default;
Define(Define&& a) = default;
Define(Instruction* inst, Operand* op) : m_Inst(inst), m_Op(op) {}
~Define() = default;
Define& operator=(const Define& other) = default;
Define& operator=(Define&& other) = default;
Instruction* m_Inst; // Instruction that writes to the temp
Operand* m_Op; // The (destination) operand within that instruction.
};
std::vector<Define> m_Defines; // Array of instructions whose results this operand can use. (only if eType == OPERAND_TYPE_TEMP)
uint32_t m_ForLoopInductorName; // If non-zero, this (eType==OPERAND_TYPE_TEMP) is an inductor variable used in for loop, and it has a special number as given here (overrides ui32RegisterNumber)
#ifdef _DEBUG
uint64_t id;
#endif
};

View File

@ -1,255 +0,0 @@
#pragma once
#include <vector>
#include <string>
#include <map>
#include "growing_array.h"
#include "internal_includes/tokens.h"
#include "internal_includes/reflect.h"
#include "include/ShaderInfo.h"
#include "internal_includes/Instruction.h"
#include "internal_includes/Declaration.h"
#include "internal_includes/ControlFlowGraph.h"
#include "bstrlib.h"
struct ConstantArrayChunk
{
ConstantArrayChunk() : m_Size(0), m_AccessMask(0), m_Rebase(0), m_ComponentCount(0) {}
ConstantArrayChunk(uint32_t sz, uint32_t mask, Operand *firstUse)
: m_Size(sz), m_AccessMask(mask), m_Rebase(0), m_ComponentCount(0)
{
m_UseSites.push_back(firstUse);
}
uint32_t m_Size;
uint32_t m_AccessMask;
uint32_t m_Rebase;
uint32_t m_ComponentCount;
std::vector<Operand *> m_UseSites;
};
typedef std::multimap<uint32_t, ConstantArrayChunk> ChunkMap;
struct ConstantArrayInfo
{
ConstantArrayInfo() : m_OrigDeclaration(0), m_Chunks() {}
Declaration *m_OrigDeclaration; // Pointer to the original declaration of the const array
ChunkMap m_Chunks; // map of <starting offset, chunk info>, same start offset might have multiple entries for different access masks
};
class ShaderPhase
{
public:
ShaderPhase()
:
ePhase(MAIN_PHASE),
ui32InstanceCount(0),
postShaderCode(),
hasPostShaderCode(0),
earlyMain(),
ui32OrigTemps(0),
ui32TotalTemps(0),
psTempDeclaration(NULL),
pui32SplitInfo(),
peTempTypes(),
acInputNeedsRedirect(),
acOutputNeedsRedirect(),
acPatchConstantsNeedsRedirect(),
m_CFG(),
m_CFGInitialized(false),
m_NextFreeTempRegister(1),
m_NextTexCoordTemp(0)
{}
void ResolveUAVProperties(const ShaderInfo& sInfo);
void UnvectorizeImmMoves(); // Transform MOV tX.xyz, (0, 1, 2) into MOV tX.x, 0; MOV tX.y, 1; MOV tX.z, 2 to make datatype analysis easier
void PruneConstArrays(); // Walk through everything that accesses a const array to see if we could make it smaller
void ExpandSWAPCs(); // Expand all SWAPC opcodes into a bunch of MOVCs. Must be done first!
ConstantArrayInfo m_ConstantArrayInfo;
std::vector<Declaration> psDecl;
std::vector<Instruction> psInst;
SHADER_PHASE_TYPE ePhase;
uint32_t ui32InstanceCount; // In case of hull shaders, how many instances this phase needs to have. Defaults to 1.
bstring postShaderCode;//End of main or before emit()
int hasPostShaderCode;
bstring earlyMain;//Code to be inserted at the start of phase
uint32_t ui32OrigTemps; // The number of temporaries this phase originally declared
uint32_t ui32TotalTemps; // The number of temporaries this phase has now
Declaration *psTempDeclaration; // Shortcut to the OPCODE_DCL_TEMPS opcode
// The split table is a table containing the index of the original register this register was split out from, or 0xffffffff
// Format: lowest 16 bits: original register. bits 16-23: rebase (eg value of 1 means .yzw was changed to .xyz): bits 24-31: component count
std::vector<uint32_t> pui32SplitInfo;
std::vector<SHADER_VARIABLE_TYPE> peTempTypes;
// These are needed in cases we have 2 vec2 texcoords combined into one vec4 and they are accessed together.
std::vector<unsigned char> acInputNeedsRedirect; // If 0xff, requires re-routing all reads via a combined vec4. If 0xfe, the same but the vec4 has already been declared.
std::vector<unsigned char> acOutputNeedsRedirect; // Same for outputs
std::vector<unsigned char> acPatchConstantsNeedsRedirect; // Same for patch constants
// Get the Control Flow Graph for this phase, build it if necessary.
HLSLcc::ControlFlow::ControlFlowGraph &GetCFG();
uint32_t m_NextFreeTempRegister; // A counter for creating new temporaries for for-loops.
uint32_t m_NextTexCoordTemp; // A counter for creating tex coord temps for driver issue workarounds
private:
bool m_CFGInitialized;
HLSLcc::ControlFlow::ControlFlowGraph m_CFG;
};
class Shader
{
public:
Shader()
:
ui32MajorVersion(0),
ui32MinorVersion(0),
eShaderType(INVALID_SHADER),
eTargetLanguage(LANG_DEFAULT),
extensions(0),
fp64(0),
ui32ShaderLength(0),
aui32FuncTableToFuncPointer(),
aui32FuncBodyToFuncTable(),
funcTable(),
funcPointer(),
ui32NextClassFuncName(),
pui32FirstToken(NULL),
asPhases(),
sInfo(),
abScalarInput(),
abScalarOutput(),
aIndexedInput(),
aIndexedOutput(),
aIndexedInputParents(),
aeResourceDims(),
acInputDeclared(),
acOutputDeclared(),
aiOpcodeUsed(NUM_OPCODES, 0),
ui32CurrentVertexOutputStream(0),
textureSamplers(),
m_DummySamplerDeclared(false),
maxSemanticIndex(0)
{
}
// Retrieve the number of components the temp register has.
uint32_t GetTempComponentCount(SHADER_VARIABLE_TYPE eType, uint32_t ui32Reg) const;
//Hull shaders have multiple phases.
//Each phase has its own temps.
//Convert from per-phase temps to global temps.
void ConsolidateHullTempVars();
// Detect temp registers per data type that are actually used.
void PruneTempRegisters();
// Check if inputs and outputs are accessed across semantic boundaries
// as in, 2x texcoord vec2's are packed together as vec4 but still accessed together.
void AnalyzeIOOverlap();
// Compute maxSemanticIndex based on the results of AnalyzeIOOverlap
void SetMaxSemanticIndex();
// Change all references to vertex position to always be highp, having them be mediump causes problems on Metal and Vivante GPUs.
void ForcePositionToHighp();
void FindUnusedGlobals(uint32_t flags); // Finds the DCL_CONSTANT_BUFFER with name "$Globals" and searches through all usages for each member of it and mark if they're actually ever used.
void ExpandSWAPCs();
uint32_t ui32MajorVersion;
uint32_t ui32MinorVersion;
SHADER_TYPE eShaderType;
GLLang eTargetLanguage;
const struct GlExtensions *extensions;
int fp64;
//DWORDs in program code, including version and length tokens.
uint32_t ui32ShaderLength;
//Instruction* functions;//non-main subroutines
HLSLcc::growing_vector<uint32_t> aui32FuncTableToFuncPointer; // dynamic alloc?
HLSLcc::growing_vector<uint32_t> aui32FuncBodyToFuncTable;
struct FuncTableEntry
{
HLSLcc::growing_vector<uint32_t> aui32FuncBodies;
};
HLSLcc::growing_vector<FuncTableEntry> funcTable;
struct FuncPointerEntry
{
HLSLcc::growing_vector<uint32_t> aui32FuncTables;
uint32_t ui32NumBodiesPerTable;
};
HLSLcc::growing_vector<FuncPointerEntry> funcPointer;
HLSLcc::growing_vector<uint32_t> ui32NextClassFuncName;
const uint32_t* pui32FirstToken;//Reference for calculating current position in token stream.
std::vector<ShaderPhase> asPhases;
ShaderInfo sInfo;
// There are 2 input/output register spaces in DX bytecode: one for per-patch data and one for per-vertex.
// Which one is used depends on the context:
// per-vertex space is used in vertex/pixel/geom shaders always
// hull shader control point phase uses per-vertex by default, other phases are per-patch by default (can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT)
// domain shader is per-patch by default, can access per-vertex with OPERAND_TYPE_I/O_CONTROL_POINT
// Below, the [2] is accessed with 0 == per-vertex, 1 == per-patch
// Note that these ints are component masks
HLSLcc::growing_vector<int> abScalarInput[2];
HLSLcc::growing_vector<int> abScalarOutput[2];
HLSLcc::growing_vector<int> aIndexedInput[2];
HLSLcc::growing_vector<bool> aIndexedOutput[2];
HLSLcc::growing_vector<int> aIndexedInputParents[2];
HLSLcc::growing_vector<RESOURCE_DIMENSION> aeResourceDims;
HLSLcc::growing_vector<char> acInputDeclared[2];
HLSLcc::growing_vector<char> acOutputDeclared[2];
std::vector<int> aiOpcodeUsed; // Initialized to NUM_OPCODES elements above.
uint32_t ui32CurrentVertexOutputStream;
TextureSamplerPairs textureSamplers;
std::vector<char> psIntTempSizes; // Array for whether this temp register needs declaration as int temp
std::vector<char> psInt16TempSizes; // min16ints
std::vector<char> psInt12TempSizes; // min12ints
std::vector<char> psUIntTempSizes; // Same for uints
std::vector<char> psUInt16TempSizes; // ... and for uint16's
std::vector<char> psFloatTempSizes; // ...and for floats
std::vector<char> psFloat16TempSizes; // ...and for min16floats
std::vector<char> psFloat10TempSizes; // ...and for min10floats
std::vector<char> psDoubleTempSizes; // ...and for doubles
std::vector<char> psBoolTempSizes; // ... and for bools
bool m_DummySamplerDeclared; // If true, the shader doesn't declare any samplers but uses texelFetch and we have added a dummy sampler for Vulkan for that.
uint32_t maxSemanticIndex; // Highest semantic index found by SignatureAnalysis
private:
void DoIOOverlapOperand(ShaderPhase *psPhase, Operand *psOperand);
};

View File

@ -1,32 +0,0 @@
#pragma once
#include "HLSLCrossCompilerContext.h"
#include "Shader.h"
struct Declaration;
// Base class for translator backend implenentations.
class Translator
{
protected:
HLSLCrossCompilerContext *psContext;
public:
explicit Translator(HLSLCrossCompilerContext *ctx) : psContext(ctx) {}
virtual ~Translator() {}
virtual bool Translate() = 0;
virtual void TranslateDeclaration(const Declaration *psDecl) = 0;
// Translate system value type to name, return true if succeeded and no further translation is necessary
virtual bool TranslateSystemValue(const Operand *psOperand, const ShaderInfo::InOutSignature *sig, std::string &result, uint32_t *pui32IgnoreSwizzle, bool isIndexed, bool isInput, bool *outSkipPrefix = NULL, int *iIgnoreRedirect = NULL) = 0;
// In GLSL, the input and output names cannot clash.
// Also, the output name of previous stage must match the input name of the next stage.
// So, do gymnastics depending on which shader we're running on and which other shaders exist in this program.
//
virtual void SetIOPrefixes() = 0;
void SetExtensions(const struct GlExtensions *ext)
{
psContext->psShader->extensions = ext;
}
};

View File

@ -1,138 +0,0 @@
#pragma once
#include <set>
#include <map>
#include <list>
#include <vector>
#include <algorithm>
#include <stdint.h>
#include <string.h>
struct DefineUseChainEntry;
struct UseDefineChainEntry;
typedef std::set<DefineUseChainEntry *> DefineSet;
typedef std::set<UseDefineChainEntry *> UsageSet;
struct Instruction;
class Operand;
class ShaderInfo;
namespace HLSLcc
{
namespace ControlFlow
{
class ControlFlowGraph;
}
}
// Def-Use chain per temp component
struct DefineUseChainEntry
{
DefineUseChainEntry()
: psInst(0)
, psOp(0)
, usages()
, writeMask(0)
, index(0)
, isStandalone(0)
{
memset(psSiblings, 0, 4 * sizeof(DefineUseChainEntry *));
}
Instruction *psInst; // The declaration (write to this temp component)
Operand *psOp; // The operand within this instruction for the write target
UsageSet usages; // List of usages that are dependent on this write
uint32_t writeMask; // Access mask; which all components were written to in the same op
uint32_t index; // For which component was this definition created for?
uint32_t isStandalone; // A shortcut for analysis: if nonzero, all siblings of all usages for both this and all this siblings
struct DefineUseChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this define's corresponding entries for the other components.
#if _DEBUG
bool operator==(const DefineUseChainEntry &a) const
{
if (psInst != a.psInst)
return false;
if (psOp != a.psOp)
return false;
if (writeMask != a.writeMask)
return false;
if (index != a.index)
return false;
if (isStandalone != a.isStandalone)
return false;
// Just check that each one has the same amount of usages
if (usages.size() != a.usages.size())
return false;
return true;
}
#endif
};
typedef std::list<DefineUseChainEntry> DefineUseChain;
struct UseDefineChainEntry
{
UseDefineChainEntry()
: psInst(0)
, psOp(0)
, defines()
, accessMask(0)
, index(0)
{
memset(psSiblings, 0, 4 * sizeof(UseDefineChainEntry *));
}
Instruction *psInst; // The use (read from this temp component)
Operand *psOp; // The operand within this instruction for the read
DefineSet defines; // List of writes that are visible to this read
uint32_t accessMask; // Which all components were read together with this one
uint32_t index; // For which component was this usage created for?
struct UseDefineChainEntry *psSiblings[4]; // In case of vectorized op, contains pointer to this usage's corresponding entries for the other components.
#if _DEBUG
bool operator==(const UseDefineChainEntry &a) const
{
if (psInst != a.psInst)
return false;
if (psOp != a.psOp)
return false;
if (accessMask != a.accessMask)
return false;
if (index != a.index)
return false;
// Just check that each one has the same amount of usages
if (defines.size() != a.defines.size())
return false;
return true;
}
#endif
};
typedef std::list<UseDefineChainEntry> UseDefineChain;
typedef std::map<uint32_t, UseDefineChain> UseDefineChains;
typedef std::map<uint32_t, DefineUseChain> DefineUseChains;
typedef std::vector<DefineUseChainEntry *> ActiveDefinitions;
// Do flow control analysis on the instructions and build the define-use and use-define chains
void BuildUseDefineChains(std::vector<Instruction> &instructions, uint32_t ui32NumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, HLSLcc::ControlFlow::ControlFlowGraph &cfg);
// Do temp splitting based on use-define chains
void UDSplitTemps(uint32_t *psNumTemps, DefineUseChains &psDUChains, UseDefineChains &psUDChains, std::vector<uint32_t> &pui32SplitTable);
// Based on the sampler precisions, downgrade the definitions if possible.
void UpdateSamplerPrecisions(const ShaderInfo &psContext, DefineUseChains &psDUChains, uint32_t ui32NumTemps);
// Optimization pass for successive passes: Mark Operand->isStandalone for definitions that are "standalone": all usages (and all their sibligns) of this and all its siblings only see this definition.
void CalculateStandaloneDefinitions(DefineUseChains &psDUChains, uint32_t ui32NumTemps);
// Write the uses and defines back to Instruction and Operand member lists.
void WriteBackUsesAndDefines(DefineUseChains &psDUChains);

View File

@ -1,21 +0,0 @@
#ifndef DEBUG_H_
#define DEBUG_H_
#ifdef _DEBUG
#include "assert.h"
#define ASSERT(expr) CustomAssert(expr)
static void CustomAssert(int expression)
{
if (!expression)
{
assert(0);
}
}
#else
#define UNUSED(EXPR_) \
do { if (false) (void)(EXPR_); } while(0)
#define ASSERT(expr) UNUSED(expr)
#endif
#endif

View File

@ -1,10 +0,0 @@
#ifndef DECODE_H
#define DECODE_H
#include "internal_includes/Shader.h"
Shader* DecodeDXBC(uint32_t* data, uint32_t decodeFlags);
void UpdateOperandReferences(Shader* psShader, SHADER_PHASE_TYPE eShaderPhaseType, Instruction* psInst);
#endif

View File

@ -1,328 +0,0 @@
#ifndef LANGUAGES_H
#define LANGUAGES_H
#include "hlslcc.h"
#include "HLSLCrossCompilerContext.h"
#include "Shader.h"
static int InOutSupported(const GLLang eLang)
{
if (eLang == LANG_ES_100 || eLang == LANG_120)
{
return 0;
}
return 1;
}
static int WriteToFragData(const GLLang eLang)
{
if (eLang == LANG_ES_100 || eLang == LANG_120)
{
return 1;
}
return 0;
}
static int ShaderBitEncodingSupported(const GLLang eLang)
{
if (eLang != LANG_ES_300 &&
eLang != LANG_ES_310 &&
eLang < LANG_330)
{
return 0;
}
return 1;
}
static int HaveOverloadedTextureFuncs(const GLLang eLang)
{
if (eLang == LANG_ES_100 || eLang == LANG_120)
{
return 0;
}
return 1;
}
static bool IsMobileTarget(const HLSLCrossCompilerContext *psContext)
{
if ((psContext->flags & HLSLCC_FLAG_MOBILE_TARGET) != 0)
return true;
return false;
}
//Only enable for ES. Vulkan and Switch.
//Not present in 120, ignored in other desktop languages. Specifically enabled on Vulkan.
static int HavePrecisionQualifiers(const HLSLCrossCompilerContext *psContext)
{
if ((psContext->flags & HLSLCC_FLAG_VULKAN_BINDINGS) != 0 || (psContext->flags & HLSLCC_FLAG_NVN_TARGET) != 0)
return 1;
const GLLang eLang = psContext->psShader->eTargetLanguage;
if (eLang >= LANG_ES_100 && eLang <= LANG_ES_310)
{
return 1;
}
return 0;
}
static int EmitLowp(const HLSLCrossCompilerContext *psContext)
{
const GLLang eLang = psContext->psShader->eTargetLanguage;
return eLang == LANG_ES_100 ? 1 : 0;
}
static int HaveCubemapArray(const GLLang eLang)
{
if (eLang >= LANG_400 && eLang <= LANG_GL_LAST)
return 1;
return 0;
}
static bool IsESLanguage(const GLLang eLang)
{
return (eLang >= LANG_ES_FIRST && eLang <= LANG_ES_LAST);
}
static bool IsDesktopGLLanguage(const GLLang eLang)
{
return (eLang >= LANG_GL_FIRST && eLang <= LANG_GL_LAST);
}
//Only on vertex inputs and pixel outputs.
static int HaveLimitedInOutLocationQualifier(const GLLang eLang, const struct GlExtensions *extensions)
{
if (eLang >= LANG_330 || eLang == LANG_ES_300 || eLang == LANG_ES_310 || (extensions && ((struct GlExtensions*)extensions)->ARB_explicit_attrib_location))
{
return 1;
}
return 0;
}
static int HaveInOutLocationQualifier(const GLLang eLang)
{
if (eLang >= LANG_410 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
//layout(binding = X) uniform {uniformA; uniformB;}
//layout(location = X) uniform uniform_name;
static int HaveUniformBindingsAndLocations(const GLLang eLang, const struct GlExtensions *extensions, unsigned int flags)
{
if (flags & HLSLCC_FLAG_DISABLE_EXPLICIT_LOCATIONS)
return 0;
if (eLang >= LANG_430 || eLang == LANG_ES_310 ||
(extensions && ((struct GlExtensions*)extensions)->ARB_explicit_uniform_location && ((struct GlExtensions*)extensions)->ARB_shading_language_420pack))
{
return 1;
}
return 0;
}
static int DualSourceBlendSupported(const GLLang eLang)
{
if (eLang >= LANG_330)
{
return 1;
}
return 0;
}
static int SubroutinesSupported(const GLLang eLang)
{
if (eLang >= LANG_400)
{
return 1;
}
return 0;
}
//Before 430, flat/smooth/centroid/noperspective must match
//between fragment and its previous stage.
//HLSL bytecode only tells us the interpolation in pixel shader.
static int PixelInterpDependency(const GLLang eLang)
{
if (eLang < LANG_430)
{
return 1;
}
return 0;
}
static int HaveUnsignedTypes(const GLLang eLang)
{
switch (eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveBitEncodingOps(const GLLang eLang)
{
switch (eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveNativeBitwiseOps(const GLLang eLang)
{
switch (eLang)
{
case LANG_ES_100:
case LANG_120:
return 0;
default:
break;
}
return 1;
}
static int HaveDynamicIndexing(HLSLCrossCompilerContext *psContext, const Operand* psOperand = NULL)
{
// WebGL only allows dynamic indexing with constant expressions, loop indices or a combination.
// The only exception is for uniform access in vertex shaders, which can be indexed using any expression.
switch (psContext->psShader->eTargetLanguage)
{
case LANG_ES_100:
case LANG_120:
if (psOperand != NULL)
{
if (psOperand->m_ForLoopInductorName)
return 1;
if (psContext->psShader->eShaderType == VERTEX_SHADER && psOperand->eType == OPERAND_TYPE_CONSTANT_BUFFER)
return 1;
}
return 0;
default:
break;
}
return 1;
}
static int HaveGather(const GLLang eLang)
{
if (eLang >= LANG_400 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveGatherNonConstOffset(const GLLang eLang)
{
if (eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveQueryLod(const GLLang eLang)
{
if (eLang >= LANG_400)
{
return 1;
}
return 0;
}
static int HaveQueryLevels(const GLLang eLang)
{
if (eLang >= LANG_430)
{
return 1;
}
return 0;
}
static int HaveFragmentCoordConventions(const GLLang eLang)
{
if (eLang >= LANG_150)
{
return 1;
}
return 0;
}
static int HaveGeometryShaderARB(const GLLang eLang)
{
if (eLang >= LANG_150)
{
return 1;
}
return 0;
}
static int HaveAtomicCounter(const GLLang eLang)
{
if (eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveAtomicMem(const GLLang eLang)
{
if (eLang >= LANG_430 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveImageAtomics(const GLLang eLang)
{
if (eLang >= LANG_420)
{
return 1;
}
return 0;
}
static int HaveCompute(const GLLang eLang)
{
if (eLang >= LANG_430 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HaveImageLoadStore(const GLLang eLang)
{
if (eLang >= LANG_420 || eLang == LANG_ES_310)
{
return 1;
}
return 0;
}
static int HavePreciseQualifier(const GLLang eLang)
{
if (eLang >= LANG_400) // TODO: Add for ES when we're adding 3.2 lang
{
return 1;
}
return 0;
}
#endif

View File

@ -1,26 +0,0 @@
#ifndef REFLECT_H
#define REFLECT_H
#include "hlslcc.h"
struct ShaderPhase_TAG;
typedef struct
{
uint32_t* pui32Inputs;
uint32_t* pui32Outputs;
uint32_t* pui32Resources;
uint32_t* pui32Interfaces;
uint32_t* pui32Inputs11;
uint32_t* pui32Outputs11;
uint32_t* pui32OutputsWithStreams;
uint32_t* pui32PatchConstants;
uint32_t* pui32PatchConstants11;
} ReflectionChunks;
void LoadShaderInfo(const uint32_t ui32MajorVersion,
const uint32_t ui32MinorVersion,
const ReflectionChunks* psChunks,
ShaderInfo* psInfo, uint32_t decodeFlags);
#endif

Some files were not shown because too many files have changed in this diff Show More