diff --git a/.gitignore b/.gitignore index dfa80ea..21be2a1 100644 --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,13 @@ /.idea /scripts/shader_paths.txt /cache/shader_loader.h +#>fips +# this area is managed by fips, do not edit +.fips-* +fips-files/build/ +fips-files/deploy/ +*.pyc +.vscode/ +.idea/ +CMakeUserPresets.json +#setup_surface(&window, false); - while (!mirage_window::get_windows().empty()) { - mirage_window::poll_events(); - } - - app.init(); + app.run(); return 0; } diff --git a/scripts/compile_shaders.py b/scripts/compile_shaders.py deleted file mode 100644 index 8d27ed8..0000000 --- a/scripts/compile_shaders.py +++ /dev/null @@ -1,470 +0,0 @@ -#!/usr/bin/env python3 -# -*- coding: utf-8 -*- - -from typing import List, Tuple, Iterator -from pathlib import Path -import argparse -import subprocess -import sys -import re - -# 设置控制台输出编码 -# if sys.platform.startswith('win'): -# # Windows系统下设置 -# sys.stdout.reconfigure(encoding='utf-8') -# sys.stderr.reconfigure(encoding='utf-8') - -def print_utf8(message: str): - """以UTF-8编码打印消息""" - print(message) - -# 着色器类型和扩展名定义 -SHADER_TYPES = { - 'vertex': 'LLGL::ShaderType::Vertex', - 'pixel': 'LLGL::ShaderType::Fragment', - 'fragment': 'LLGL::ShaderType::Fragment', - 'compute': 'LLGL::ShaderType::Compute', - 'geometry': 'LLGL::ShaderType::Geometry', - 'tess_control': 'LLGL::ShaderType::TessControl', - 'tess_evaluation': 'LLGL::ShaderType::TessEvaluation' -} - -SHADER_EXTENSIONS = { - 'glsl': 'glsl', - 'spirv': 'spirv', - 'dxil': 'dxil', - 'dxbc': 'dxbc', - 'metallib': 'metallib', - 'metal': 'metal', - 'wgsl': 'wgsl' -} - -# 不同目标平台的编译配置 -TARGET_PROFILES = { - 'glsl': ['-profile', 'glsl_460'], - 'spirv': ['-profile', 'spirv_1_6'], - 'dxbc': ['-profile', 'sm_5_0'], - 'dxil': ['-profile', 'sm_6_6'], - 'metallib': ['-capability', 'metallib'], - 'metal': ['-capability', 'metal'], -} - -class ShaderEntry: - """着色器入口点信息""" - def __init__(self, name: str, shader_type: str): - self.name = name - self.shader_type = shader_type - -class CompiledShaderInfo: - """编译后的着色器信息""" - def __init__(self, output_file: str, base: str, entry: ShaderEntry): - self.output_file = output_file - self.base = base - self.entry = entry - if entry.shader_type == 'pixel': - self.entry.shader_type = 'fragment' - -# 存放所有编译成功的着色器信息 -compiled_shaders: List[CompiledShaderInfo] = [] - -def find_shader_files(input_dir: Path, extensions: List[str]) -> Iterator[Path]: - """递归查找指定目录下的着色器文件""" - for file_path in Path(input_dir).rglob('*'): - if file_path.suffix in extensions: - yield file_path - -def find_slang_entries(input_file: Path) -> List[ShaderEntry]: - """从着色器文件中提取入口点函数名和类型""" - # 匹配 [shader("xxx")] 形式的着色器类型声明,以及后面的函数名 - pattern = re.compile( -r'\[\s*shader\s*\(\s*"([^"]+)"\s*\)\s*\]\s*' # 匹配 [shader("xxx")] -r'(?:\[\s*[^\]]+\])*' # 可选:匹配其他属性如 [numthreads(8,8,1)] -r'\s*\w+\s+(\w+)\s*\(' # 匹配函数声明:返回类型 函数名( - ) - try: - content = input_file.read_text(encoding='utf-8') - matches = pattern.findall(content) - print_utf8(f"**调试**: 在文件 {input_file} 中找到的匹配: {matches}") - - entries = [] - for shader_type, name in matches: - if shader_type in SHADER_TYPES: - entries.append(ShaderEntry(name, shader_type)) - else: - print_utf8(f"**警告**: 未知的着色器类型 {shader_type}") - return entries - except Exception as e: - print_utf8(f"**错误**: 解析文件 {input_file} 失败: {e}") - return [] - - -def get_shader_extension(build_type: str) -> str: - """根据构建类型获取对应的着色器文件扩展名""" - return SHADER_EXTENSIONS.get(build_type, 'dat') - -def create_compiler_command( - input_file: Path, - entry: ShaderEntry, - output_file: Path, - target_type: str, - args: argparse.Namespace -) -> List[str]: - """生成着色器编译命令""" - cmd = [args.slangc, - str(input_file), - "-entry", entry.name, - "-o", str(output_file), - "-target", target_type, - "-g3" if args.debug else "-O3", - ] - - if target_type in TARGET_PROFILES: - cmd.extend(TARGET_PROFILES[target_type]) - - return cmd - -def needs_recompile(input_file: Path, output_file: Path) -> bool: - """检查是否需要重新编译着色器""" - if not output_file.exists(): - return True - try: - return input_file.stat().st_mtime > output_file.stat().st_mtime - except OSError: - return True - -def compile_shader( - input_file: Path, - target_types: List[Tuple[str, bool]], - output_dir: Path, - args: argparse.Namespace -) -> bool: - """编译单个着色器文件""" - try: - entries = find_slang_entries(input_file) - if not entries: - print_utf8(f"**跳过**: {input_file} - 未找到着色器入口点") - return True - - output_dir.mkdir(parents=True, exist_ok=True) - base = input_file.stem - success = True - - - for entry in entries: - compiled_shaders.append(CompiledShaderInfo(f"{base}_{entry.name}", base, entry)) - - for target_type, enabled in target_types: - if not enabled: - continue - - for entry in entries: - output_file = output_dir / f"{base}_{entry.name}.{get_shader_extension(target_type)}" - - if not needs_recompile(input_file, output_file): - print_utf8(f"**跳过**: {output_file} - 文件已是最新") - else: - cmd = create_compiler_command(input_file, entry, output_file, target_type, args) - try: - subprocess.run(cmd, check=True, capture_output=True, text=True) - print_utf8(f"**成功**: 编译 {input_file}:{entry.name} -> {output_file}") - except subprocess.CalledProcessError as e: - print_utf8(f"**错误**: 编译 {input_file}:{entry.name} 失败") - print_utf8(e.stderr) - success = False - continue - - return success - except Exception as e: - print_utf8(f"**错误**: 处理 {input_file} 时发生异常: {e}") - return False - -def generate_pipeline_header_preamble() -> List[str]: - """Generate the header file preamble""" - - return [ - "#pragma once", - "", - "#include \"mirage.h\"", - "#include \"misc/mirage_type.h\"", - "#include ", - "#include ", - "#include ", - "#include ", - "#include ", - "#include ", - "#include ", - "", - "namespace generated_pipelines {", - "", - "// 辅助函数:加载着色器", - "inline auto LoadShader(", - " LLGL::RenderSystem* renderer,", - " const std::string& filename,", - " const LLGL::ShaderType type,", - " const char* entryPoint,", - " const LLGL::ShaderDescriptor& shaderDesc = {}) {", - "", - " // 根据渲染器类型选择着色器文件后缀", - " auto rendererID = renderer->GetRendererID();", - " std::string ext;", - " // 选择对应的文件扩展名", - " if (rendererID == LLGL::RendererID::OpenGL) {", - " ext = \".glsl\";", - " } else if (rendererID == LLGL::RendererID::Vulkan) {", - " ext = \".spirv\";", - " } else if (rendererID == LLGL::RendererID::Direct3D11) {", - " ext = \".dxbc\";", - " } else if (rendererID == LLGL::RendererID::Direct3D12) {", - " ext = \".dxil\";", - " } else if (rendererID == LLGL::RendererID::Metal) {", - " ext = \".metallib\";", - " } else {", - " ext = \".dat\";", - " }", - "", - " // 构造最终的文件名", - " std::string finalFilename = filename;", - " size_t pos = finalFilename.find_last_of('.');", - " if (pos != std::string::npos) {", - " finalFilename = finalFilename.substr(0, pos) + ext;", - " } else {", - " finalFilename += ext;", - " }", - " finalFilename = (mirage::get_shader_path() / finalFilename).string();", - "", - " // 读取着色器文件", - " std::vector shaderData;", - " try {", - " std::ifstream file(finalFilename, std::ios::binary | std::ios::ate);", - " if (!file.is_open()) {", - " throw std::runtime_error(\"Failed to open shader file: \" + finalFilename);", - " }", - " size_t fileSize = static_cast(file.tellg());", - " shaderData.resize(fileSize);", - " file.seekg(0);", - " file.read(shaderData.data(), fileSize);", - " } catch (const std::exception& e) {", - " throw std::runtime_error(\"Failed to read shader file: \" + std::string(e.what()));", - " }", - "", - " if (rendererID == LLGL::RendererID::OpenGL) {", - " // 添加终止符", - " shaderData.push_back('\\0');", - " }", - " // 创建着色器", - " LLGL::ShaderDescriptor desc = shaderDesc;", - " desc.source = shaderData.data();", - " desc.sourceSize = shaderData.size();", - " desc.entryPoint = rendererID == LLGL::RendererID::OpenGL ? \"main\" : entryPoint;", - " desc.type = type;", - " desc.profile = \"460\";", - " desc.sourceType = rendererID == LLGL::RendererID::OpenGL ? LLGL::ShaderSourceType::CodeString : LLGL::ShaderSourceType::BinaryBuffer;", - "", - " auto shader = renderer->CreateShader(desc);", - " if (auto report = shader->GetReport()) {", - " spdlog::error(\"Shader compilation report: {}\", report->GetText());", - " }", - " return mirage::shader_ptr(shader, mirage::llgl_deleter);", - "}", - "" - "inline auto create_pipeline_layout(LLGL::RenderSystem* renderer, const LLGL::PipelineLayoutDescriptor& in_desc) {", - " auto pipelineLayout = renderer->CreatePipelineLayout(in_desc);", - " return mirage::pipeline_layout_ptr(pipelineLayout, mirage::llgl_deleter);", - "}", - ] - -def generate_compute_pipeline(header_lines: List[str], shader: CompiledShaderInfo): - """Generate compute pipeline creation function""" - func_name = f"create_{shader.base}_{shader.entry.name}_pipeline" - - header_lines.extend([ - f"// 计算管线: {shader.base} - {shader.entry.name}", - f"inline auto {func_name}(LLGL::RenderSystem* renderer) {{", - " // 加载计算着色器", - f" auto computeShader = LoadShader(renderer, \"{shader.output_file.name}\", {SHADER_TYPES['compute']}, \"{shader.entry.name}\");", - "", - " // 创建管线布局", - " LLGL::PipelineLayoutDescriptor layoutDesc;", - " auto pipelineLayout = create_pipeline_layout(renderer, layoutDesc);", - "", - " // 创建计算管线", - " LLGL::ComputePipelineDescriptor pipelineDesc;", - " pipelineDesc.computeShader = computeShader.get();", - " pipelineDesc.pipelineLayout = pipelineLayout.get();", - "", - " auto pipeline = renderer->CreatePipelineState(pipelineDesc);", - " mirage::pipeline_info info;", - " info.pipeline_state = mirage::pipeline_state_ptr(pipeline, mirage::llgl_deleter);", - " info.pipeline_layout = pipelineLayout;", - " info.shaders = {computeShader};", - " return info;", - "}", - "" - ]) - -def generate_graphics_pipeline(header_lines: List[str], base: str, shaders: List[CompiledShaderInfo]): - """Generate graphics pipeline creation function""" - func_name = f"create_{base}_pipeline" - - header_lines.extend([ - f"// 图形管线: {base}", - f"inline auto {func_name}(LLGL::RenderSystem* renderer, const LLGL::RenderPass* render_pass, const LLGL::PipelineLayoutDescriptor& in_pipeline_layout_desc, const LLGL::VertexFormat& vertex_format = mirage::create_vertex_format()) {{", - f" // 加载各个阶段的着色器", - f" LLGL::ShaderDescriptor vertexShaderDesc, fragShaderDesc;", - f" vertexShaderDesc.vertex.inputAttribs = vertex_format.attributes;", - ]) - - SHADER_DESC_NAME = { - 'vertex': 'vertexShaderDesc', - 'pixel': 'fragShaderDesc', - 'fragment': 'fragShaderDesc', - } - - # Load all shader stages - for shader in shaders: - shader_type = shader.entry.shader_type - if shader_type in SHADER_TYPES: - header_lines.append( - f" auto {shader_type}Shader = LoadShader(renderer, " - f"\"{shader.output_file}\", {SHADER_TYPES[shader_type]}, " - f"\"{shader.entry.name}\", {SHADER_DESC_NAME[shader_type]});" - ) - - # Create pipeline layout and descriptor - header_lines.extend([ - " // 创建管线布局", - " auto pipelineLayout = create_pipeline_layout(renderer, in_pipeline_layout_desc);", - "", - " // 创建图形管线", - " LLGL::GraphicsPipelineDescriptor pipelineDesc;", - ]) - - # Set all shader stages - for shader in shaders: - shader_type = shader.entry.shader_type - if shader_type in SHADER_TYPES: - header_lines.append(f" pipelineDesc.{shader_type}Shader = {shader_type}Shader.get();") - - # Set basic render states - header_lines.extend([ - " pipelineDesc.pipelineLayout = pipelineLayout.get();", - "", - " // 设置基本渲染状态", - " pipelineDesc.renderPass = render_pass;", - " pipelineDesc.rasterizer.multiSampleEnabled = true;", - " pipelineDesc.blend.targets[0].blendEnabled = true;", - " pipelineDesc.depth.testEnabled = false;", - " pipelineDesc.depth.writeEnabled = false;", - "", - " auto pipeline = renderer->CreatePipelineState(pipelineDesc);", - ]) - - # Finish function - header_lines.extend([ - " mirage::pipeline_info info;", - " info.pipeline_state = mirage::pipeline_state_ptr(pipeline, mirage::llgl_deleter);", - " info.pipeline_layout = pipelineLayout;", - " info.shaders = {", - ]) - - for shader in shaders: - shader_type = shader.entry.shader_type - if shader_type in SHADER_TYPES: - header_lines.append(f" {shader_type}Shader,") - - header_lines.append(" };") - - header_lines.extend([ - " return info;", - "}", - "" - ]) - -def generate_pipeline_header(header_path: Path): - """Generate the complete pipeline header file""" - header_lines = generate_pipeline_header_preamble() - - # Group shaders by base name - shader_groups = {} - for shader in compiled_shaders: - if shader.base not in shader_groups: - shader_groups[shader.base] = [] - shader_groups[shader.base].append(shader) - - # Generate pipeline functions - for base, shaders in shader_groups.items(): - has_compute = any(s.entry.shader_type == "compute" for s in shaders) - if has_compute: - for shader in shaders: - if shader.entry.shader_type == "compute": - generate_compute_pipeline(header_lines, shader) - else: - generate_graphics_pipeline(header_lines, base, shaders) - - # Close namespace - header_lines.extend([ - "} // namespace generated_pipelines", - "" - ]) - - try: - header_path.write_text("\n".join(header_lines), encoding="utf-8") - print_utf8(f"**成功**: 生成管线 C++ 头文件 {header_path}") - except Exception as e: - print_utf8(f"**错误**: 写入头文件 {header_path} 失败: {e}") - -def main(): - """主函数:解析命令行参数并执行编译流程""" - if sys.platform.startswith('win'): - subprocess.run(['chcp', '65001'], shell=True) - - parser = argparse.ArgumentParser(description="使用 slangc 编译着色器并生成 LLGL 渲染管线 C++ 头文件") - parser.add_argument("--shader-list", help="着色器列表文件路径") - parser.add_argument("--output-dir", help="输出目录") - parser.add_argument("--slangc", default="/Users/nanako/Documents/Slang/bin/slangc", help="slangc 编译器路径") - parser.add_argument("--debug", action="store_true", help="启用调试模式编译") - parser.add_argument("--opengl", action="store_true", help="编译 OpenGL 着色器") - parser.add_argument("--vulkan", action="store_true", help="编译 Vulkan 着色器") - parser.add_argument("--d3d11", action="store_true", help="编译 D3D11 着色器") - parser.add_argument("--d3d12", action="store_true", help="编译 D3D12 着色器") - parser.add_argument("--metal", action="store_true", help="编译 Metal 着色器") - parser.add_argument("--header", help="生成的头文件路径") - - args = parser.parse_args() - - target_types = [ - ['glsl', args.opengl], - ['spirv', args.vulkan], - ['dxbc', args.d3d11], - ['dxil', args.d3d12], - ['metallib', args.metal], - ['metal', args.metal], - ] - - output_dir = Path(args.output_dir or "shaders") - shader_list = Path(args.shader_list or "shader_paths.txt") - - try: - shader_paths = shader_list.read_text(encoding="utf-8").splitlines() - except Exception as e: - print_utf8(f"**错误**: 读取着色器列表文件 {shader_list} 失败: {e}") - sys.exit(1) - - all_success = True - for shader_path in shader_paths: - shader_path = shader_path.strip() - if not shader_path: - continue - for file in find_shader_files(Path(shader_path), ['.slang']): - if not compile_shader(file, target_types, output_dir, args): - all_success = False - - # 输出到shader_list所在目录 - header_file = Path(args.header or shader_list.parent / "generated_pipelines.h") - generate_pipeline_header(header_file) - - if not all_success: - sys.exit(1) - -if __name__ == "__main__": - main() diff --git a/src/CMakeLists.txt b/src/CMakeLists.txt index 506476c..c6f1a60 100644 --- a/src/CMakeLists.txt +++ b/src/CMakeLists.txt @@ -2,17 +2,6 @@ cmake_minimum_required(VERSION 3.15) project(mirage_core LANGUAGES C CXX) set(CMAKE_CXX_STANDARD 26) -if (MSVC) - # MSVC编译器设置C++标准 - add_compile_options(/std:c++latest) - # 设置utf-8编码 - add_compile_options(/utf-8) -endif () -if (WIN32) - # 定义Windows版本宏 - add_definitions(-DWIN32_LEAN_AND_MEAN) - add_definitions(-DUNICODE -D_UNICODE) -endif() find_package(Freetype REQUIRED) find_package(Eigen3 REQUIRED) @@ -23,14 +12,14 @@ retrieve_files(${CMAKE_CURRENT_SOURCE_DIR} SRC_FILES) add_library(${PROJECT_NAME} STATIC ${SRC_FILES}) target_link_libraries(${PROJECT_NAME} PUBLIC Freetype::Freetype Eigen3::Eigen) target_include_directories(${PROJECT_NAME} PUBLIC ${CMAKE_CURRENT_SOURCE_DIR}) -configure_glfw_native(${PROJECT_NAME}) add_os_definitions(${PROJECT_NAME}) +# 添加编译shader的自定义命令 +add_mirage_shader_directory(${CMAKE_CURRENT_SOURCE_DIR}/shaders) +add_shader_dependencies(${PROJECT_NAME}) + if (WIN32) - target_compile_definitions(${PROJECT_NAME} PUBLIC SOKOL_D3D11=1) - target_link_libraries(${PROJECT_NAME} PRIVATE d3d11 dxgi) -elseif (APPLE) - target_compile_definitions(${PROJECT_NAME} PUBLIC SOKOL_METAL=1) + target_compile_definitions(${PROJECT_NAME} PUBLIC -DSOKOL_D3D11) elseif (UNIX) - target_compile_definitions(${PROJECT_NAME} PUBLIC SOKOL_GLCORE=1) + target_compile_definitions(${PROJECT_NAME} PUBLIC -DSOKOL_GLCORE33) endif () diff --git a/src/core/render_context.h b/src/core/render_context.h index 2e30194..1d86154 100644 --- a/src/core/render_context.h +++ b/src/core/render_context.h @@ -1,18 +1,22 @@ #pragma once #include "sokol/sokol_gfx.h" -#include + +#include "misc/mirage_type.h" +#include +#include class mirage_window; + class mirage_render_context { public: virtual ~mirage_render_context() = default; virtual bool init() { return false; } + virtual void cleanup() { } + virtual void tick(const duration_type& in_delta) = 0; virtual sg_environment get_environment() = 0; virtual bool setup_surface(mirage_window* in_window, bool in_hdr) { return false; } -private: - }; -std::unique_ptr mirage_create_render_context(); +mirage_render_context* mirage_create_render_context(); diff --git a/src/core/render_window.cpp b/src/core/render_window.cpp index 9379138..a6b52f5 100644 --- a/src/core/render_window.cpp +++ b/src/core/render_window.cpp @@ -1,3 +1,29 @@ #include "render_window.h" +Eigen::Matrix4f mirage_window::create_screen_to_dci_matrix(float in_screen_width, float in_screen_height) { + // 创建一个单位矩阵 + Eigen::Matrix4f matrix = Eigen::Matrix4f::Identity(); + // 缩放因子 + const float scale_x = 2.0f / in_screen_width; + const float scale_y = -2.0f / in_screen_height; // Y轴翻转,因为窗口坐标系Y轴向下 + + // 平移因子 + constexpr float translate_x = -1.0f; + constexpr float translate_y = 1.0f; + + // 设置缩放 + matrix(0, 0) = scale_x; + matrix(1, 1) = scale_y; + + // 设置平移 + matrix(0, 3) = translate_x; + matrix(1, 3) = translate_y; + + return matrix; +} + +Eigen::Matrix4f mirage_window::create_screen_to_dci_matrix() const { + const auto size = get_window_frame_size(); + return create_screen_to_dci_matrix(size.x(), size.y()); +} diff --git a/src/core/render_window.h b/src/core/render_window.h index edd11d4..6ae0641 100644 --- a/src/core/render_window.h +++ b/src/core/render_window.h @@ -2,6 +2,23 @@ #include "windows/windows_render_context.h" #include +class mirage_window; + +struct mirage_window_state { + virtual ~mirage_window_state() { + clear(); + } + sg_buffer buffer; + sg_swapchain swapchain; + sg_bindings bindings; + sg_pipeline pipeline; + bool vsync = true; + + virtual void clear() {} + virtual void present() {} + virtual void resize(const Eigen::Vector2i& size) {} +}; + class mirage_window { public: bool create_window(int width, int height, const wchar_t* title); @@ -18,6 +35,12 @@ public: void move(const Eigen::Vector2i& pos) { move(pos.x(), pos.y()); } [[nodiscard]] Eigen::Vector2i get_window_size() const; + [[nodiscard]] Eigen::Vector2i get_window_position() const; + [[nodiscard]] Eigen::Vector2i get_window_frame_size() const; + // 创建从屏幕坐标到DCI坐标的正交投影矩阵 + [[nodiscard]] static Eigen::Matrix4f create_screen_to_dci_matrix(float in_screen_width, float in_screen_height); + [[nodiscard]] Eigen::Matrix4f create_screen_to_dci_matrix() const; + [[nodiscard]] void* get_window_handle() const; // style functions @@ -34,6 +57,10 @@ public: [[nodiscard]] bool close_requested() const { return close_request; } static bool poll_events(); static const std::vector& get_windows(); + + void on_resize(int width, int height); + + std::unique_ptr state; private: void* window_handle{}; bool close_request = false; diff --git a/src/core/windows/windows_render_context.cpp b/src/core/windows/windows_render_context.cpp index c814f17..6727d19 100644 --- a/src/core/windows/windows_render_context.cpp +++ b/src/core/windows/windows_render_context.cpp @@ -5,141 +5,216 @@ #include "windows_render_context.h" #include +#include #include "core/render_window.h" +#include -mirage_windows_render_context::~mirage_windows_render_context() { - cleanup(); -} +#include "windows_window_state.h" +#include "misc/scope_exit.h" +#include "shaders/test.hlsl.h" +#include "shaders/mirage_rounded_rect.hlsl.h" -bool mirage_windows_render_context::init() { - try { - // 定义支持的特性级别(从高到低排序) - D3D_FEATURE_LEVEL feature_levels[] = { - D3D_FEATURE_LEVEL_11_1, - D3D_FEATURE_LEVEL_11_0, - D3D_FEATURE_LEVEL_10_1, - D3D_FEATURE_LEVEL_10_0 - }; +windows_mirage_render_context::~windows_mirage_render_context() { cleanup(); } - // 设置设备创建标志 - UINT device_flags = 0; - #if DEBUG - device_flags |= D3D11_CREATE_DEVICE_DEBUG; // 在Debug模式下启用调试层 - #endif +bool windows_mirage_render_context::init() { + try { + // 定义支持的特性级别(从高到低排序) + D3D_FEATURE_LEVEL feature_levels[] = { + D3D_FEATURE_LEVEL_11_1, + D3D_FEATURE_LEVEL_11_0, + D3D_FEATURE_LEVEL_10_1, + D3D_FEATURE_LEVEL_10_0 + }; - // 定义要尝试的驱动类型数组 - constexpr D3D_DRIVER_TYPE driver_types[] = { - D3D_DRIVER_TYPE_HARDWARE, // 首选硬件加速 - D3D_DRIVER_TYPE_WARP, // 其次是WARP软件渲染器 - D3D_DRIVER_TYPE_REFERENCE // 最后是参考软件渲染器 - }; + // 设置设备创建标志 + UINT device_flags = D3D11_CREATE_DEVICE_BGRA_SUPPORT | D3D11_CREATE_DEVICE_SINGLETHREADED; // BGRA支持和单线程模式 +#if DEBUG + device_flags |= D3D11_CREATE_DEVICE_DEBUG; // 在Debug模式下启用调试层 +#endif - // 尝试按优先级创建设备 - HRESULT hr = E_FAIL; - D3D_DRIVER_TYPE used_driver_type = D3D_DRIVER_TYPE_UNKNOWN; + // 定义要尝试的驱动类型数组 + constexpr D3D_DRIVER_TYPE driver_types[] = { + D3D_DRIVER_TYPE_HARDWARE, + // 首选硬件加速 + D3D_DRIVER_TYPE_WARP, + // 其次是WARP软件渲染器 + D3D_DRIVER_TYPE_REFERENCE // 最后是参考软件渲染器 + }; - for (const auto& driver_type : driver_types) { - hr = D3D11CreateDevice( - nullptr, // 使用默认适配器 - driver_type, // 驱动类型 - nullptr, // 软件栅格化模块句柄(仅用于软件设备) - device_flags, // 设备创建标志 - feature_levels, // 特性级别数组 - ARRAYSIZE(feature_levels), // 特性级别数量 - D3D11_SDK_VERSION, // SDK版本 - &device, // 输出设备 - &feature_level, // 输出获取的特性级别 - &device_context // 输出设备上下文 - ); + // 尝试按优先级创建设备 + HRESULT hr = E_FAIL; + D3D_DRIVER_TYPE used_driver_type = D3D_DRIVER_TYPE_UNKNOWN; + // 使用 DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2(推荐) + if (SetProcessDpiAwarenessContext(DPI_AWARENESS_CONTEXT_PER_MONITOR_AWARE_V2)) { + std::cout << "mirage: " << "DPI awareness set to per-monitor aware v2" << std::endl; + } - if (SUCCEEDED(hr)) { - used_driver_type = driver_type; - break; - } - } + for (const auto& driver_type: driver_types) { + hr = D3D11CreateDevice( + nullptr, + // 使用默认适配器 + driver_type, + // 驱动类型 + nullptr, + // 软件栅格化模块句柄(仅用于软件设备) + device_flags, + // 设备创建标志 + feature_levels, + // 特性级别数组 + ARRAYSIZE(feature_levels), + // 特性级别数量 + D3D11_SDK_VERSION, + // SDK版本 + &device, + // 输出设备 + &feature_level, + // 输出获取的特性级别 + &device_context // 输出设备上下文 + ); - // 检查是否成功创建设备 - if (FAILED(hr)) { - std::cerr << "Failed to create D3D11 device with any driver type. HRESULT: 0x" - << std::hex << hr << std::dec << std::endl; - cleanup(); - return false; - } + if (SUCCEEDED(hr)) { + used_driver_type = driver_type; + break; + } + } - // 输出使用设备 - DXGI_ADAPTER_DESC adapter_desc; + // 检查是否成功创建设备 + if (FAILED(hr)) { + std::cerr << "mirage: " << "Failed to create D3D11 device with any driver type. HRESULT: 0x" + << std::hex << hr << std::dec << std::endl; + cleanup(); + return false; + } - // 获取DXGI工厂以便后续创建交换链 + // 输出使用设备 + DXGI_ADAPTER_DESC adapter_desc; + + // 获取DXGI工厂以便后续创建交换链 IDXGIAdapter* dxgi_adapter = nullptr; - IDXGIDevice* dxgi_device = nullptr; - hr = device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast(&dxgi_device)); - if (SUCCEEDED(hr)) { - hr = dxgi_device->GetAdapter(&dxgi_adapter); - if (SUCCEEDED(hr)) { - hr = dxgi_adapter->GetDesc(&adapter_desc); - if (FAILED(hr)) { - std::wcerr << L"Failed to get DXGI Adapter description" << std::endl; - } - hr = dxgi_adapter->GetParent(__uuidof(IDXGIFactory), reinterpret_cast(&dxgi_factory)); - if (FAILED(hr)) { - dxgi_factory = nullptr; - } - dxgi_adapter->Release(); - } - dxgi_device->Release(); - } + IDXGIDevice* dxgi_device = nullptr; + hr = device->QueryInterface(__uuidof(IDXGIDevice), reinterpret_cast(&dxgi_device)); + if (SUCCEEDED(hr)) { + hr = dxgi_device->GetAdapter(&dxgi_adapter); + if (SUCCEEDED(hr)) { + hr = dxgi_adapter->GetDesc(&adapter_desc); + if (FAILED(hr)) { std::wcerr << L"mirage: " << L"Failed to get DXGI Adapter description" << std::endl; } + hr = dxgi_adapter->GetParent(__uuidof(IDXGIFactory), reinterpret_cast(&dxgi_factory)); + if (FAILED(hr)) { dxgi_factory = nullptr; } + dxgi_adapter->Release(); + } + dxgi_device->Release(); + } - if (!dxgi_factory) { - std::cerr << "Failed to get DXGI Factory" << std::endl; - cleanup(); - return false; - } + if (!dxgi_factory) { + std::cerr << "Failed to get DXGI Factory" << std::endl; + cleanup(); + return false; + } - // 输出初始化成功信息 - std::cout << "D3D11 device created successfully" << std::endl; - std::wcout << L"Using adapter: " << adapter_desc.Description << std::endl; + // 输出初始化成功信息 + std::cout << "mirage: " << "D3D11 device created successfully" << std::endl; + std::wcout << L"mirage: " << L"Using adapter: " << adapter_desc.Description << std::endl; - // 输出驱动类型信息 - auto driver_type_str = "Unknown"; - switch (used_driver_type) { - case D3D_DRIVER_TYPE_HARDWARE: driver_type_str = "Hardware"; break; - case D3D_DRIVER_TYPE_WARP: driver_type_str = "WARP"; break; - case D3D_DRIVER_TYPE_REFERENCE: driver_type_str = "Reference"; break; - default: ; - } - std::cout << "Using driver type: " << driver_type_str << std::endl; + // 输出驱动类型信息 + auto driver_type_str = "Unknown"; + switch (used_driver_type) { + case D3D_DRIVER_TYPE_HARDWARE: + driver_type_str = "Hardware"; + break; + case D3D_DRIVER_TYPE_WARP: + driver_type_str = "WARP"; + break; + case D3D_DRIVER_TYPE_REFERENCE: + driver_type_str = "Reference"; + break; + default: ; + } + std::cout << "mirage: " << "Using driver type: " << driver_type_str << std::endl; - // 输出特性级别信息 - auto feature_level_str = "Unknown"; - switch (feature_level) { - case D3D_FEATURE_LEVEL_11_1: feature_level_str = "11.1"; break; - case D3D_FEATURE_LEVEL_11_0: feature_level_str = "11.0"; break; - case D3D_FEATURE_LEVEL_10_1: feature_level_str = "10.1"; break; - case D3D_FEATURE_LEVEL_10_0: feature_level_str = "10.0"; break; - default: ; - } - std::cout << "Using feature level: " << feature_level_str << std::endl; + // 输出特性级别信息 + auto feature_level_str = "Unknown"; + switch (feature_level) { + case D3D_FEATURE_LEVEL_11_1: + feature_level_str = "11.1"; + break; + case D3D_FEATURE_LEVEL_11_0: + feature_level_str = "11.0"; + break; + case D3D_FEATURE_LEVEL_10_1: + feature_level_str = "10.1"; + break; + case D3D_FEATURE_LEVEL_10_0: + feature_level_str = "10.0"; + break; + default: ; + } + std::cout << "mirage: " << "Using feature level: " << feature_level_str << std::endl; - return true; - } - catch (const std::exception& e) { - std::cerr << "Exception during D3D11 initialization: " << e.what() << std::endl; - cleanup(); - return false; - } + return true; + } catch (const std::exception& e) { + std::cerr << "mirage: " << "Exception during D3D11 initialization: " << e.what() << std::endl; + cleanup(); + return false; + } } // 资源清理函数 -void mirage_windows_render_context::cleanup() { - // 安全释放COM接口 - if (device_context) { device_context->Release(); device_context = nullptr; } - if (device) { device->Release(); device = nullptr; } - if (dxgi_factory) { dxgi_factory->Release(); dxgi_factory = nullptr; } +void windows_mirage_render_context::cleanup() { + mirage_render_context::cleanup(); + // 安全释放COM接口 + sg_shutdown(); + if (device_context) { + device_context->Release(); + device_context = nullptr; + } + if (device) { + device->Release(); + device = nullptr; + } + if (dxgi_factory) { + dxgi_factory->Release(); + dxgi_factory = nullptr; + } } +void windows_mirage_render_context::tick(const duration_type& in_delta) { + const auto& windows = mirage_window::get_windows(); + for (const auto& window: windows) { + auto& window_state = window->state; + sg_pass pass{}; + pass.action.colors[0].load_action = SG_LOADACTION_CLEAR; + pass.action.colors[0].store_action = SG_STOREACTION_STORE; + pass.action.colors[0].clear_value = { 0.f, 0.f, 0.f, 1.0f }; -sg_environment mirage_windows_render_context::get_environment() { + pass.action.depth.load_action = SG_LOADACTION_CLEAR; + pass.action.depth.store_action = SG_STOREACTION_DONTCARE; + pass.action.depth.clear_value = 1.0f; + pass.swapchain = window_state->swapchain; + + sg_begin_pass(pass); + sg_apply_pipeline(pip); + + sg_apply_viewport(0, 0, window_state->swapchain.width, window_state->swapchain.height, true); + + sg_bindings bindings{}; + bindings.vertex_buffers[0] = vertex_buffer; + bindings.index_buffer = index_buffer; + sg_apply_bindings(bindings); + + const auto& matrix = window->create_screen_to_dci_matrix(); + + sg_apply_uniforms(0, SG_RANGE(matrix)); + + sg_draw(0, 12, 2); + sg_end_pass(); + sg_commit(); + + window_state->present(); + } +} + +sg_environment windows_mirage_render_context::get_environment() { return { .d3d11 = { .device = device, @@ -148,35 +223,80 @@ sg_environment mirage_windows_render_context::get_environment() { }; } -bool mirage_windows_render_context::setup_surface(mirage_window* in_window, bool in_hdr) { - const auto& window_size = in_window->get_window_size(); +bool windows_mirage_render_context::setup_surface(mirage_window* in_window, bool in_hdr) { + auto state = std::make_unique(); + if (!state->init(device, dxgi_factory, in_window, in_hdr)) { return false; } - const auto format = in_hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM; + shader = sg_make_shader(get_mirage_rounded_rect_shader_desc()); - // 创建D3D11渲染目标视图 - IDXGISwapChain* swap_chain = nullptr; - DXGI_SWAP_CHAIN_DESC swap_chain_desc = { - .BufferDesc = { - .Width = static_cast(window_size.x()), - .Height = static_cast(window_size.y()), - .RefreshRate = { - .Numerator = 60, - .Denominator = 1 - }, - .Format = format, - }, - .SampleDesc = { - .Count = 1, - .Quality = 0 - }, - .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, - .BufferCount = 1, - .OutputWindow = nullptr, - .Windowed = TRUE + sg_pipeline_desc pipeline_desc = get_mirage_rounded_rect_pipeline_desc(shader, state->swapchain); + pip = sg_make_pipeline(pipeline_desc); + + mirage_triangle_t triangles[4]; + + // 创建顶点缓冲区(一个矩形) + std::vector vertices = { + // x, y, r, g, b, a + { { 0.f, 0.f }, { 0.f, 0.f }, { 1.0f, 1.0f, 1.0f, 1.0f } }, + // 左上 - 黄色 + { { 100.f, 0.f }, { 1.f, 0.f }, { 1.0f, 1.0f, 1.0f, 1.0f } }, + // 右上 - 蓝色 + { { 0.f, 100.f }, { 0.f, 1.f }, { 1.0f, 1.0f, 1.0f, 1.0f } }, + // 左下 - 红色 + { { 100.f, 100.f }, { 1.f, 1.f }, { 1.0f, 1.0f, 1.0f, 1.0f } }, + // 右下 - 绿色 + { { 200.f, 200.f }, { 0.f, 0.f }, { 0.0f, 1.0f, 1.0f, 1.0f } }, + // 左上 - 黄色 + { { 300.f, 200.f }, { 1.f, 0.f }, { 1.0f, 0.0f, 1.0f, 1.0f } }, + // 右上 - 蓝色 + { { 200.f, 300.f }, { 0.f, 1.f }, { 1.0f, 0.0f, 1.0f, 1.0f } }, + // 左下 - 红色 + { { 300.f, 300.f }, { 1.f, 1.f }, { 0.0f, 1.0f, 0.0f, 1.0f } }, + // 右下 - 绿色 }; + for (auto& v: vertices) { + v.param_a.a = 100; + v.param_a.b = 100; + + v.param_b.a = 0; + v.param_b.b = 5; + v.param_b.c = 10; + v.param_b.d = 20; + } + + // 三角形索引不变 + triangles[0].indices[0] = 0; + triangles[0].indices[1] = 1; + triangles[0].indices[2] = 2; + triangles[1].indices[0] = 1; + triangles[1].indices[1] = 3; + triangles[1].indices[2] = 2; + + triangles[2].indices[0] = 4; + triangles[2].indices[1] = 5; + triangles[2].indices[2] = 6; + triangles[3].indices[0] = 5; + triangles[3].indices[1] = 7; + triangles[3].indices[2] = 6; + + std::span vertex_span{ vertices.data(), vertices.size() }; + sg_buffer_desc vertex_buffer_desc{ + .size = vertex_span.size_bytes() * 2, + .type = SG_BUFFERTYPE_VERTEXBUFFER, + .usage = SG_USAGE_DYNAMIC, + }; + vertex_buffer = sg_make_buffer(vertex_buffer_desc); + std::span vertex_span2{ vertices.data(), vertices.size() }; + sg_update_buffer(vertex_buffer, sg_range{ vertex_span2.data(), vertex_span2.size_bytes() }); + + index_buffer = sg_make_buffer(sg_buffer_desc{ + .type = SG_BUFFERTYPE_INDEXBUFFER, + .usage = SG_USAGE_IMMUTABLE, + .data = SG_RANGE(triangles), + }); + + in_window->state = std::move(state); return true; } -std::unique_ptr mirage_create_render_context() { - return std::make_unique(); -} +mirage_render_context* mirage_create_render_context() { return new windows_mirage_render_context(); } diff --git a/src/core/windows/windows_render_context.h b/src/core/windows/windows_render_context.h index 6db84fc..0144b65 100644 --- a/src/core/windows/windows_render_context.h +++ b/src/core/windows/windows_render_context.h @@ -3,21 +3,26 @@ #include "core/render_context.h" -class mirage_windows_render_context : public mirage_render_context { +class windows_mirage_render_context : public mirage_render_context { public: - mirage_windows_render_context() = default; - virtual ~mirage_windows_render_context() override; + windows_mirage_render_context() = default; + virtual ~windows_mirage_render_context() override; bool init() override; - - void cleanup(); + void cleanup() override; + virtual void tick(const duration_type& in_delta) override; sg_environment get_environment() override; - virtual bool setup_surface(mirage_window* in_window, bool in_hdr) override; + bool setup_surface(mirage_window* in_window, bool in_hdr) override; private: - ID3D11Device* device = nullptr; - ID3D11DeviceContext* device_context = nullptr; + ID3D11Device* device{}; + ID3D11DeviceContext* device_context{}; - IDXGIFactory* dxgi_factory = nullptr; + IDXGIFactory* dxgi_factory{}; D3D_FEATURE_LEVEL feature_level; + + sg_shader shader{}; + sg_pipeline pip{}; + sg_buffer vertex_buffer{}; + sg_buffer index_buffer{}; }; diff --git a/src/core/windows/windows_render_window.cpp b/src/core/windows/windows_render_window.cpp index 28ce484..87442d5 100644 --- a/src/core/windows/windows_render_window.cpp +++ b/src/core/windows/windows_render_window.cpp @@ -13,21 +13,25 @@ std::vector windows; LRESULT CALLBACK WindowProc(HWND hwnd, UINT uMsg, WPARAM wParam, LPARAM lParam) { switch (uMsg) { case WM_CLOSE: - // 标记关闭请求,但暂不销毁窗口 - // 实际销毁操作应在主循环中处理 - for (const auto& window : windows) { + for (const auto& window: windows) { if (window->get_window_handle() == hwnd) { window->close(); break; } } - std::erase_if(windows, [hwnd](const mirage_window* window) { - return window->get_window_handle() == hwnd; - }); + std::erase_if(windows, [hwnd](const mirage_window* window) { return window->get_window_handle() == hwnd; }); return 0; case WM_DESTROY: PostQuitMessage(0); return 0; + case WM_SIZE: + for (const auto& window: windows) { + if (window->get_window_handle() == hwnd) { + window->on_resize(LOWORD(lParam), HIWORD(lParam)); + break; + } + } + return 0; default: return DefWindowProc(hwnd, uMsg, wParam, lParam); } @@ -37,21 +41,23 @@ bool mirage_window::create_window(int width, int height, const wchar_t* title) { WNDCLASS wc = {}; wc.lpfnWndProc = WindowProc; wc.hInstance = GetModuleHandle(nullptr); - wc.lpszClassName = L"mirage_window"; - wc.style = CS_HREDRAW | CS_VREDRAW; + wc.lpszClassName = L"mirage_window_class"; + wc.style = CS_HREDRAW | CS_VREDRAW | CS_OWNDC; + wc.hCursor = LoadCursor(NULL, IDC_ARROW); + wc.hIcon = LoadIcon(NULL, IDI_WINLOGO); RegisterClass(&wc); - window_handle = (void*) CreateWindow(wc.lpszClassName, - title, - WS_OVERLAPPEDWINDOW, - 0, - 0, - width, - height, - nullptr, - nullptr, - wc.hInstance, - nullptr); + RECT rect = { 0, 0, width, height }; + AdjustWindowRect(&rect, WS_OVERLAPPEDWINDOW, FALSE); + + window_handle = (void*)CreateWindowW( + L"mirage_window_class", title, + WS_OVERLAPPEDWINDOW | WS_VISIBLE, + CW_USEDEFAULT, CW_USEDEFAULT, + rect.right - rect.left, rect.bottom - rect.top, + NULL, NULL, GetModuleHandleW(NULL), NULL + ); + if (!window_handle) { std::cerr << "Failed to create window" << std::endl; return false; @@ -63,7 +69,12 @@ bool mirage_window::create_window(int width, int height, const wchar_t* title) { void mirage_window::show() { ShowWindow(WINDOW_HANDLE, SW_SHOW); } void mirage_window::hide() { ShowWindow(WINDOW_HANDLE, SW_HIDE); } -void mirage_window::close() { close_request = true; DestroyWindow(WINDOW_HANDLE); } + +void mirage_window::close() { + close_request = true; + DestroyWindow(WINDOW_HANDLE); +} + void mirage_window::maximize() { ShowWindow(WINDOW_HANDLE, SW_MAXIMIZE); } void mirage_window::minimize() { ShowWindow(WINDOW_HANDLE, SW_MINIMIZE); } @@ -80,9 +91,31 @@ void mirage_window::resize(int width, int height) { } Eigen::Vector2i mirage_window::get_window_size() const { + // 获取窗口大小, 包括边框和标题栏 RECT rect; - if (!GetClientRect(WINDOW_HANDLE, &rect)) { return {}; } - return { rect.right - rect.left, rect.bottom - rect.top }; + if (GetWindowRect(WINDOW_HANDLE, &rect)) { + int width = rect.right - rect.left; + int height = rect.bottom - rect.top; + return Eigen::Vector2i(width, height); + } + return Eigen::Vector2i(0, 0); +} + +Eigen::Vector2i mirage_window::get_window_frame_size() const { + // 获取窗口大小, 不包括边框和标题栏 (客户区大小) + RECT rect; + if (GetClientRect(WINDOW_HANDLE, &rect)) { + int width = rect.right - rect.left; + int height = rect.bottom - rect.top; + return Eigen::Vector2i(width, height); + } + return Eigen::Vector2i(0, 0); +} + +Eigen::Vector2i mirage_window::get_window_position() const { + RECT rect; + if (!GetWindowRect(WINDOW_HANDLE, &rect)) { return {}; } + return { rect.left, rect.top }; } void* mirage_window::get_window_handle() const { return window_handle; } @@ -153,3 +186,9 @@ bool mirage_window::poll_events() { } const std::vector& mirage_window::get_windows() { return windows; } + +void mirage_window::on_resize(int width, int height) { + state->swapchain.width = width; + state->swapchain.height = height; + state->resize(Eigen::Vector2i(width, height)); +} diff --git a/src/core/windows/windows_window_state.cpp b/src/core/windows/windows_window_state.cpp new file mode 100644 index 0000000..1ef4ae7 --- /dev/null +++ b/src/core/windows/windows_window_state.cpp @@ -0,0 +1,132 @@ +#include "windows_window_state.h" + +#include + +#include "misc/scope_exit.h" + +bool windows_window_state::init(ID3D11Device* in_device, IDXGIFactory* in_factory, mirage_window* in_window, + bool in_hdr) { + dx_device = in_device; + dxgi_factory = in_factory; + + const auto size = in_window->get_window_frame_size(); + const auto window_handle = in_window->get_window_handle(); + + const auto format = in_hdr ? DXGI_FORMAT_R16G16B16A16_FLOAT : DXGI_FORMAT_R8G8B8A8_UNORM; + + // 创建D3D11渲染目标视图 + DXGI_SWAP_CHAIN_DESC swap_chain_desc = { + .BufferDesc = { + .Width = static_cast(size.x()), + .Height = static_cast(size.y()), + .RefreshRate = { + .Numerator = 240, + .Denominator = 1 + }, + .Format = format, + }, + .SampleDesc = { + .Count = 1, + .Quality = 0 + }, + .BufferUsage = DXGI_USAGE_RENDER_TARGET_OUTPUT, + .BufferCount = 1, + .OutputWindow = (HWND) window_handle, + .Windowed = TRUE + }; + + // 创建交换链 + HRESULT hr = dxgi_factory->CreateSwapChain(dx_device, &swap_chain_desc, &dx_swap_chain); + if (FAILED(hr)) { + std::cerr << "mirage: " << "Failed to create DXGI Swap Chain. HRESULT: 0x" << std::hex << hr << std::dec << + std::endl; + return false; + } + + // 获取后台缓冲区 + ID3D11Texture2D* back_buffer = nullptr; + hr = dx_swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**) &back_buffer); + if (FAILED(hr)) { + std::cerr << "mirage: " << "Failed to get back buffer from DXGI Swap Chain. HRESULT: 0x" << std::hex << hr << + std::dec << std::endl; + return false; + } + ON_SCOPE_EXIT { back_buffer->Release(); }; + + // 创建渲染目标视图 + ID3D11RenderTargetView* render_target_view = nullptr; + hr = dx_device->CreateRenderTargetView(back_buffer, nullptr, &render_target_view); + if (FAILED(hr)) { + std::cerr << "mirage: " << "Failed to create render target view. HRESULT: 0x" << std::hex << hr << std::dec << + std::endl; + return false; + } + + swapchain.d3d11.render_view = render_target_view; + swapchain.width = static_cast(size.x()); + swapchain.height = static_cast(size.y()); + swapchain.color_format = in_hdr ? SG_PIXELFORMAT_RGBA16F : SG_PIXELFORMAT_RGBA8; + swapchain.depth_format = SG_PIXELFORMAT_NONE; + swapchain.sample_count = 1; + swapchain.d3d11.resolve_view = nullptr; + swapchain.d3d11.depth_stencil_view = nullptr; + return true; +} + +void windows_window_state::clear() { + dx_swap_chain->Release(); + get_dx_render_target_view()->Release(); +} + +void windows_window_state::present() { dx_swap_chain->Present(vsync ? 1 : 0, 0); } + +void windows_window_state::resize(const Eigen::Vector2i& size) { + if (size.x() == 0 || size.y() == 0) { return; } + + // 保存当前格式 + D3D11_RENDER_TARGET_VIEW_DESC rtv_desc = {}; + auto dx_render_target_view = get_dx_render_target_view(); + if (dx_render_target_view) { + dx_render_target_view->GetDesc(&rtv_desc); + + // 释放旧的渲染目标视图 + dx_render_target_view->Release(); + } + + // 重设交换链缓冲区大小 + HRESULT hr = dx_swap_chain->ResizeBuffers( + 1, + // 缓冲区数量 + size.x(), + // 新宽度 + size.y(), + // 新高度 + rtv_desc.Format, + // 保持原格式 + 0 // 标志 + ); + + if (FAILED(hr)) { + // 重设失败 + std::cerr << "重设交换链缓冲区失败" << std::endl; + return; + } + + // 获取新的后台缓冲区 + ID3D11Texture2D* back_buffer = nullptr; + hr = dx_swap_chain->GetBuffer(0, __uuidof(ID3D11Texture2D), (void**) &back_buffer); + if (FAILED(hr)) { + std::cerr << "获取后台缓冲区失败" << std::endl; + return; + } + ON_SCOPE_EXIT { back_buffer->Release(); }; + + // 创建新的渲染目标视图 + hr = dx_device->CreateRenderTargetView(back_buffer, + nullptr, + (ID3D11RenderTargetView**) &swapchain.d3d11.render_view); + if (FAILED(hr)) { + std::cerr << "创建新的渲染目标视图失败" << std::endl; + return; + } +} diff --git a/src/core/windows/windows_window_state.h b/src/core/windows/windows_window_state.h new file mode 100644 index 0000000..67b1f82 --- /dev/null +++ b/src/core/windows/windows_window_state.h @@ -0,0 +1,21 @@ +#pragma once +#include "core/render_window.h" +#include + +struct windows_window_state final : mirage_window_state { + ID3D11RenderTargetView* get_dx_render_target_view() const { + return (ID3D11RenderTargetView*)swapchain.d3d11.render_view; + } + + bool init(ID3D11Device* in_device, IDXGIFactory* in_factory, mirage_window* in_window, bool in_hdr); + + virtual void clear() override; + + virtual void present() override; + + virtual void resize(const Eigen::Vector2i& size) override; +private: + ID3D11Device* dx_device{}; + IDXGIFactory* dxgi_factory{}; + IDXGISwapChain* dx_swap_chain{}; +}; diff --git a/src/mirage.cpp b/src/mirage.cpp index 9a9a020..1b4809c 100644 --- a/src/mirage.cpp +++ b/src/mirage.cpp @@ -1,11 +1,15 @@ #include "mirage.h" - -#include - #define SOKOL_IMPL #include "sokol/sokol_gfx.h" +#include +#include + +#include "core/render_window.h" +#include "misc/mirage_scoped_duration_timer.h" + + void mirage_log(const char* tag, uint32_t log_level, uint32_t log_item_id, const char* message_or_null, uint32_t line_nr, const char* filename_or_null, void* user_data) { if (log_level == 0) // painc @@ -18,21 +22,35 @@ void mirage_log(const char* tag, uint32_t log_level, uint32_t log_item_id, const std::clog << "sg: " << message_or_null << std::endl; } -mirage_app::mirage_app() { -} - void mirage_app::init() { - render_context = mirage_create_render_context(); - render_context->init(); - sg_desc desc = { - .logger = { - .func = mirage_log, - .user_data = nullptr - }, - .environment = render_context->get_environment(), - }; - sg_setup(desc); + duration_type duration; + { + mirage_scoped_duration_timer timer(duration); + + last_time = get_current_time(); + render_context = mirage_create_render_context(); + render_context->init(); + const sg_desc desc = { + .logger = { + .func = mirage_log, + .user_data = nullptr + }, + .environment = render_context->get_environment(), + }; + sg_setup(desc); + } + // 初始化用时 + std::cout << "mirage: " << "Initialization took " << std::chrono::duration_cast(duration).count() << "ms" << std::endl; } void mirage_app::run() { + while (!mirage_window::get_windows().empty()) { + duration_type delta_time = get_current_time() - last_time; + mirage_window::poll_events(); + render_context->tick(delta_time); + + last_time = get_current_time(); + std::this_thread::sleep_for(std::chrono::milliseconds(1)); + } + delete render_context; } diff --git a/src/mirage.h b/src/mirage.h index 9b996d9..856d828 100644 --- a/src/mirage.h +++ b/src/mirage.h @@ -1,16 +1,17 @@ #pragma once -#include #include "core/render_context.h" class mirage_render_context; class mirage_app { public: - mirage_app(); - void init(); - void run(); + + [[nodiscard]] mirage_render_context* get_render_context() const { + return render_context; + } private: - std::unique_ptr render_context; + mirage_render_context* render_context{}; + time_type last_time = {}; }; diff --git a/src/misc/mirage_scoped_duration_timer.h b/src/misc/mirage_scoped_duration_timer.h new file mode 100644 index 0000000..14e8f46 --- /dev/null +++ b/src/misc/mirage_scoped_duration_timer.h @@ -0,0 +1,15 @@ +#pragma once +#include "mirage_type.h" + +class mirage_scoped_duration_timer { +public: + explicit mirage_scoped_duration_timer(duration_type& out_duration) : duration(out_duration) { + start_time = get_current_time(); + } + ~mirage_scoped_duration_timer() { + duration = get_current_time() - start_time; + } +private: + time_type start_time; + duration_type& duration; +}; diff --git a/src/misc/mirage_type.h b/src/misc/mirage_type.h new file mode 100644 index 0000000..2e6a5a8 --- /dev/null +++ b/src/misc/mirage_type.h @@ -0,0 +1,49 @@ +#pragma once +#include +#include + +using time_type = decltype(std::chrono::high_resolution_clock::now()); +using duration_type = decltype(std::chrono::high_resolution_clock::now() - std::chrono::high_resolution_clock::now()); + +inline time_type get_current_time() { + return std::chrono::high_resolution_clock::now(); +} + +// struct VSInput { +// float2 position : POSITION; +// float2 uv : TEXCOORD0; +// float4 color : COLOR0; +// float4 param_a : TEXCOORD1; +// float4 param_b : TEXCOORD2; +// float4 param_c : TEXCOORD3; +// }; +struct mirage_vertex_param_t{ + union { + float a; + float x; + }; + union { + float b; + float y; + }; + union { + float c; + float z; + }; + union { + float d; + float w; + }; +}; +struct mirage_vertex_t { + Eigen::Vector2f position; + Eigen::Vector2f uv; + Eigen::Vector4f color; + mirage_vertex_param_t param_a; + mirage_vertex_param_t param_b; + mirage_vertex_param_t param_c; +}; + +struct mirage_triangle_t { + uint32_t indices[3]; +}; diff --git a/src/misc/scope_exit.h b/src/misc/scope_exit.h new file mode 100644 index 0000000..8d87603 --- /dev/null +++ b/src/misc/scope_exit.h @@ -0,0 +1,28 @@ +#pragma once + +template +class scope_exit_guard { +public: + scope_exit_guard(FuncType&& in_func) : func((FuncType&&)in_func) { + } + virtual ~scope_exit_guard() { + func(); + } +private: + FuncType func; +}; + +struct scope_exit_syntax_support { + virtual ~scope_exit_syntax_support() = default; + template + scope_exit_guard operator+(FuncType&& InFunc) + { + return scope_exit_guard((FuncType&&)InFunc); + } +}; + +#define PRIVATE_CONCATENATE_DETAIL(x, y) x##y +#define PRIVATE_CONCATENATE(x, y) PRIVATE_CONCATENATE_DETAIL(x, y) + +#define ON_SCOPE_EXIT const auto PRIVATE_CONCATENATE(scope_exit, __LINE__) = scope_exit_syntax_support() + [&]() + diff --git a/src/shaders/mirage_rounded_rect.slang b/src/shaders/mirage_rounded_rect.slang new file mode 100644 index 0000000..fbddc5b --- /dev/null +++ b/src/shaders/mirage_rounded_rect.slang @@ -0,0 +1,85 @@ +#include "mirage_util.slang" + +cbuffer ParamBuffer +{ + matrix transform; +}; + +struct PSInput { + float4 position : SV_POSITION; + float2 uv : TEXCOORD0; + float4 color : COLOR; + float2 size : TEXCOORD1; // 矩形大小(像素单位) + float4 radius : TEXCOORD3; // 四角圆角(像素单位): [左上,右上,左下,右下] +}; + +[shader("vertex")] +PSInput vertex_main(VSInput input) +{ + PSInput output; + + // 并通过常量缓冲区或结构化缓冲区传递 + output.position = mul(float4(input.position, 0.0, 1.0), transform); + output.uv = input.uv; + output.color = input.color; + + // 传递尺寸和半径数据 + output.size = input.param_a.xy; + output.radius = input.param_b; + + return output; +} + +// 改进的距离函数计算 +float distance_from_rect_uv(float2 p, float2 size, float corner_radius) { + // 保护除零 + corner_radius = max(corner_radius, 0.001); + + // 转换圆角半径为UV空间 + float2 corner_radius_uv = corner_radius / size; + + // 计算内部矩形边界(圆角内切矩形) + float2 inner_rect = float2(1.0, 1.0) - corner_radius_uv; + + // 计算到内部矩形的距离 + float2 q = abs(p) - inner_rect; + + // 转换回像素空间进行距离计算 + q *= size; + + // 计算SDF(有符号距离场) + return length(max(q, 0.0)) + min(max(q.x, q.y), 0.0) - corner_radius; +} + +[shader("pixel")] +float4 pixel_main(PSInput input) : SV_Target +{ + // 将UV坐标转换为从中心点(-1,1)的归一化设备坐标 + float2 p = uv_to_ndc(input.uv); + + // 确定当前像素位于哪个象限,以选择正确的圆角半径 + // 0=左上(x<0,y<0), 1=右上(x>0,y<0), 2=左下(x<0,y>0), 3=右下(x>0,y>0) + int idx = (p.x < 0 ? 0 : 1) + (p.y < 0 ? 0 : 2); + + // 获取当前象限的圆角半径 * 2 + float r = input.radius[idx]; + + // 计算到圆角矩形边界的有符号距离 + float d = distance_from_rect_uv(p, input.size, r); + + // 改进的抗锯齿算法 - 使用特定的像素宽度计算 + float pixelWidth = 1.0; + float2 ddx_p = ddx(p * input.size); + float2 ddy_p = ddy(p * input.size); + + // 计算基于像素梯度的边缘宽度 + float edge_width = 0.5 * length(float2(length(ddx_p), length(ddy_p))); + + // 应用平滑过渡 - 提高抗锯齿质量 + float alpha_factor = 1.0 - smoothstep(-edge_width, edge_width, d); + + // 输出最终颜色 + float4 final_color = input.color; + final_color.a *= alpha_factor; + return final_color; +} \ No newline at end of file diff --git a/src/shaders/mirage_util.slang b/src/shaders/mirage_util.slang new file mode 100644 index 0000000..8cfc746 --- /dev/null +++ b/src/shaders/mirage_util.slang @@ -0,0 +1,13 @@ +struct VSInput { + float2 position : POSITION; + float2 uv : TEXCOORD0; + float4 color : COLOR0; + float4 param_a : TEXCOORD1; + float4 param_b : TEXCOORD2; + float4 param_c : TEXCOORD3; +}; + +// 将uv坐标系从[0, 0]-[1, 1] -> [-1, -1]-[1, 1] +float2 uv_to_ndc(float2 uv) { + return uv * 2 - 1; +} diff --git a/src/shaders/test.slang b/src/shaders/test.slang new file mode 100644 index 0000000..18440a6 --- /dev/null +++ b/src/shaders/test.slang @@ -0,0 +1,27 @@ +#include "mirage_util.slang" + +struct ParamBuffer +{ + matrix transform; +}; +ParameterBlock param_buffer : register(b1); + +struct PSInput { + float4 position : SV_POSITION; // 裁剪空间坐标 + float4 color : COLOR; // 颜色 +}; + +[shader("vertex")] +PSInput vertex_main(VSInput input) +{ + PSInput output; + output.position = mul(float4(input.position, 0, 1.0), param_buffer.transform); + output.color = input.color; + return output; +} + +[shader("pixel")] +float4 pixel_main(PSInput input) : SV_TARGET +{ + return input.color; +} diff --git a/src/sokol/sokol_gfx.h b/src/sokol/sokol_gfx.h index 19b8272..68ba78d 100644 --- a/src/sokol/sokol_gfx.h +++ b/src/sokol/sokol_gfx.h @@ -122,11 +122,11 @@ sg_pipeline sg_make_pipeline(const sg_pipeline_desc*) sg_attachments sg_make_attachments(const sg_attachments_desc*) - --- start a render pass: + --- start a render- or compute-pass: sg_begin_pass(const sg_pass* pass); - Typically, passes render into an externally provided swapchain which + Typically, render passes render into an externally provided swapchain which presents the rendering result on the display. Such a 'swapchain pass' is started like this: @@ -137,23 +137,29 @@ a specific color), and .swapchain is an sg_swapchain struct all the required information to render into the swapchain's surfaces. - To start an 'offscreen pass' into sokol-gfx image objects, an sg_attachment + To start an 'offscreen render pass' into sokol-gfx image objects, an sg_attachment object handle is required instead of an sg_swapchain struct. An offscreen pass is started like this (assuming attachments is an sg_attachments handle): sg_begin_pass(&(sg_pass){ .action = { ... }, .attachments = attachments }); - --- set the render pipeline state for the next draw call with: + To start a compute-pass, just set the .compute item to true: + + sg_begin_pass(&(sg_pass){ .compute = true }); + + --- set the pipeline state for the next draw call with: sg_apply_pipeline(sg_pipeline pip) --- fill an sg_bindings struct with the resource bindings for the next - draw call (0..N vertex buffers, 0 or 1 index buffer, 0..N images, + draw- or dispatch-call (0..N vertex buffers, 0 or 1 index buffer, 0..N images, samplers and storage-buffers), and call: sg_apply_bindings(const sg_bindings* bindings) - to update the resource bindings + to update the resource bindings. Note that in a compute pass, no vertex- + or index-buffer bindings are allowed and will be rejected by the validation + layer. --- optionally update shader uniform data with: @@ -176,7 +182,14 @@ containing per-instance data must be bound, and the num_instances parameter must be > 1. - --- finish the current rendering pass with: + --- ...or kick of a dispatch call to invoke a compute shader workload: + + sg_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) + + The dispatch args define the number of 'compute workgroups' processed + by the currently applied compute shader. + + --- finish the current pass with: sg_end_pass() @@ -197,7 +210,7 @@ sg_destroy_pipeline(sg_pipeline pip) sg_destroy_attachments(sg_attachments atts) - --- to set a new viewport rectangle, call + --- to set a new viewport rectangle, call: sg_apply_viewport(int x, int y, int width, int height, bool origin_top_left) @@ -214,7 +227,7 @@ sg_apply_scissor_rectf(float x, float y, float width, float height, bool origin_top_left) Both sg_apply_viewport() and sg_apply_scissor_rect() must be called - inside a rendering pass + inside a rendering pass (e.g. not in a compute pass, or outside a pass) Note that sg_begin_default_pass() and sg_begin_pass() will reset both the viewport and scissor rectangles to cover the entire framebuffer. @@ -432,14 +445,18 @@ passes as textures (it is invalid to use the same image both as render target and as texture in the same pass). - The following sokol-gfx functions must only be called inside a render pass: + The following sokol-gfx functions must only be called inside a render-pass: + + sg_apply_viewport[f] + sg_apply_scissor_rect[f] + sg_draw + + The folling function may be called inside a render- or compute-pass, but + not outside a pass: - sg_apply_viewport(f) - sg_apply_scissor_rect(f) sg_apply_pipeline sg_apply_bindings sg_apply_uniforms - sg_draw A frame must have at least one 'swapchain render pass' which renders into an externally provided swapchain provided as an sg_swapchain struct to the @@ -618,8 +635,6 @@ must be 'resolved' into a separate 'resolve image', before that image can be used as texture. - NOTE: currently multisample-images cannot be bound as textures. - Creating a simple attachments object for multisampled rendering requires 3 attachment images: the color attachment image which has a sample count > 1, a resolve attachment image of the same size and pixel format @@ -683,13 +698,89 @@ texture would result in a validation error). + ON COMPUTE PASSES + ================= + Compute passes are used to update the content of storage resources + (currently only storage buffers) by running compute shader code on + the GPU. This will almost always be more efficient than computing + that same data on the CPU and uploading the data via `sg_update_buffer()`. + + NOTE: compute passes are only supported on the following platforms and + backends: + + - macOS and iOS with Metal + - Windows with D3D11 and OpenGL + - Linux with OpenGL + - web with WebGPU + + ...this means compute shaders can't be used on the following platform/backend + combos (the same restrictions apply to using storage buffers without compute + shaders): + + - macOS with GL + - iOS with GLES3 + - Android + - web with WebGL2 + + A compute pass is started with: + + sg_begin_pass(&(sg_pass){ .compute = true }); + + ...and finished with: + + sg_end_pass(); + + Typically the following functions will be called inside a compute pass: + + sg_apply_pipeline + sg_apply_bindings + sg_apply_uniforms + sg_dispatch + + The following functions are disallowed inside a compute pass + and will cause validation layer errors: + + sg_apply_viewport[f] + sg_apply_scissor_rect[f] + sg_draw + + Only special 'compute shaders' and 'compute pipelines' can be used in + compute passes. A compute shader only has a compute-function instead + of a vertex- and fragment-function pair, and it doesn't accept vertex- + and index-buffers as input, only storage-buffers, textures and non-filtering + samplers (more details on compute shaders in the following section). + + A compute pipeline is created by providing a compute shader object, + setting the .compute creation parameter to true and not defining any + 'render state': + + sg_pipeline pip = sg_make_pipeline(&(sg_pipeline_desc){ + .compute = true, + .shader = compute_shader, + }); + + The sg_apply_bindings and sg_apply_uniforms calls are the same as in + render passes, with the exception that no vertex- and index-buffers + can be bound in the sg_apply_bindings call. + + Finally to kick off a compute workload, call sg_dispatch with the + number of workgroups in the x, y and z-dimension: + + sg_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) + + Also see the following compute-shader samples: + + - https://floooh.github.io/sokol-webgpu/instancing-compute-sapp.html + - https://floooh.github.io/sokol-webgpu/computeboids-sapp.html + + ON SHADER CREATION ================== sokol-gfx doesn't come with an integrated shader cross-compiler, instead backend-specific shader sources or binary blobs need to be provided when - creating a shader object, along with information about the shader resource - binding interface needed to bind sokol-gfx resources to the proper - shader inputs. + creating a shader object, along with reflection information about the + shader resource binding interface needed to bind sokol-gfx resources to the + proper shader inputs. The easiest way to provide all this shader creation data is to use the sokol-shdc shader compiler tool to compile shaders from a common @@ -718,16 +809,17 @@ To create shaders with backend-specific shader code or binary blobs, the sg_make_shader() function requires the following information: - - Shader code or shader binary blobs for the vertex- and fragment- shader-stage: + - Shader code or shader binary blobs for the vertex- and fragment-, or the + compute-shader-stage: - for the desktop GL backend, source code can be provided in '#version 410' or - '#version 430', version 430 is required for storage buffer support, but note - that this is not available on macOS + '#version 430', version 430 is required when using storage buffers and + compute shaders support, but note that this is not available on macOS - for the GLES3 backend, source code must be provided in '#version 300 es' syntax - - for the D3D11 backend, shaders can be provided as source or binary blobs, the - source code should be in HLSL4.0 (for best compatibility) or alternatively - in HLSL5.0 syntax (other versions may work but are not tested), NOTE: when - shader source code is provided for the D3D11 backend, sokol-gfx will dynamically - load 'd3dcompiler_47.dll' + - for the D3D11 backend, shaders can be provided as source or binary + blobs, the source code should be in HLSL4.0 (for compatibility with old + low-end GPUs) or preferrably in HLSL5.0 syntax, note that when + shader source code is provided for the D3D11 backend, sokol-gfx will + dynamically load 'd3dcompiler_47.dll' - for the Metal backends, shaders can be provided as source or binary blobs, the MSL version should be in 'metal-1.1' (other versions may work but are not tested) - for the WebGPU backend, shaders must be provided as WGSL source code @@ -751,8 +843,22 @@ NOTE that vertex attributes currently must not have gaps. This requirement may be relaxed in the future. + - Specifically for Metal compute shaders, the 'number of threads per threadgroup' + must be provided. Normally this is extracted by sokol-shdc from the GLSL + shader source code. For instance the following statement in the input + GLSL: + + layout(local_size_x=64, local_size_y=1, local_size_z=1) in; + + ...will be communicated to the sokol-gfx Metal backend in the + code-generated sg_shader_desc struct: + + (sg_shader_desc){ + .mtl_threads_per_threadgroup = { .x = 64, .y = 1, .z = 1 }, + } + - Information about each uniform block used in the shader: - - the shader stage of the uniform block (vertex or fragment) + - the shader stage of the uniform block (vertex, fragment or compute) - the size of the uniform block in number of bytes - a memory layout hint (currently 'native' or 'std140') where 'native' defines a backend-specific memory layout which shouldn't be used for cross-platform code. @@ -769,11 +875,20 @@ - A description of each storage buffer used in the shader: - the shader stage of the storage buffer - - a boolean 'readonly' flag, note that currently only - readonly storage buffers are supported + - a boolean 'readonly' flag, this is used for validation and hazard + tracking in some 3D backends. Note that in render passes, only + readonly storage buffer bindings are allowed. In compute passes, any + read/write storage buffer binding is assumbed to be written to by the + compute shader. - a backend-specific bind slot: - - D3D11/HLSL: the texture register N (`register(tN)`) where N is 0..23 - (in HLSL, storage buffers and texture share the same bind space) + - D3D11/HLSL: + - for readonly storage buffer bindings: the texture register N + (`register(tN)`) where N is 0..23 (in HLSL, readonly storage + buffers and textures share the same bind space for + 'shader resource views') + - for read/write storage buffer buffer bindings: the UAV register N + (`register(uN)`) where N is 0..7 (in HLSL, readwrite storage + buffers use their own bind space for 'unordered access views') - Metal/MSL: the buffer bind slot N (`[[buffer(N)]]`) where N is 8..15 - WebGPU/WGSL: the binding N in `@group(0) @binding(N)` where N is 0..127 - GL/GLSL: the buffer binding N in `layout(binding=N)` where N is 0..7 @@ -781,7 +896,7 @@ and platforms - A description of each texture/image used in the shader: - - the shader stage of the texture (vertex or fragment) + - the shader stage of the texture (vertex, fragment or compute) - the expected image type: - SG_IMAGETYPE_2D - SG_IMAGETYPE_CUBE @@ -794,16 +909,14 @@ - SG_IMAGESAMPLETYPE_UINT - SG_IMAGESAMPLETYPE_UNFILTERABLE_FLOAT - a flag whether the texture is expected to be multisampled - (currently it's not supported to fetch data from multisampled - textures in shaders, but this is planned for a later time) - a backend-specific bind slot: - D3D11/HLSL: the texture register N (`register(tN)`) where N is 0..23 - (in HLSL, storage buffers and texture share the same bind space) + (in HLSL, readonly storage buffers and texture share the same bind space) - Metal/MSL: the texture bind slot N (`[[texture(N)]]`) where N is 0..15 - WebGPU/WGSL: the binding N in `@group(0) @binding(N)` where N is 0..127 - A description of each sampler used in the shader: - - the shader stage of the sampler (vertex or fragment) + - the shader stage of the sampler (vertex, fragment or compute) - the expected sampler type: - SG_SAMPLERTYPE_FILTERING, - SG_SAMPLERTYPE_NONFILTERING, @@ -835,7 +948,8 @@ - D3D11/HLSL: - separate bindslot space per shader stage - uniform blocks (as cbuffer): `register(b0..b7)` - - textures and storage buffers: `register(t0..t23)` + - textures and readonly storage buffers: `register(t0..t23)` + - read/write storage buffers: `register(u0..u7)` - samplers: `register(s0..s15)` - Metal/MSL: - separate bindslot space per shader stage @@ -1012,6 +1126,11 @@ ON STORAGE BUFFERS ================== + The two main purpose of storage buffers are: + + - to be populated by compute shaders with dynamically generated data + - for providing random-access data to all shader stages + Storage buffers can be used to pass large amounts of random access structured data from the CPU side to the shaders. They are similar to data textures, but are more convenient to use both on the CPU and shader side since they can be accessed @@ -1023,21 +1142,23 @@ - all GLES3 platforms (WebGL2, iOS, Android - with the option that support on Android may be added at a later point) - Currently only 'readonly' storage buffers are supported (meaning it's not possible - to write to storage buffers from shaders). - To use storage buffers, the following steps are required: - - write a shader which uses storage buffers (also see the example links below) + - write a shader which uses storage buffers (vertex- and fragment-shaders + can only read from storage buffers, while compute-shaders can both read + and write storage buffers) - create one or more storage buffers via sg_make_buffer() with the buffer type SG_BUFFERTYPE_STORAGEBUFFER - when creating a shader via sg_make_shader(), populate the sg_shader_desc struct with binding info (when using sokol-shdc, this step will be taken care of automatically) - - which storage buffer bind slots on the vertex- and fragment-stage + - which storage buffer bind slots on the vertex-, fragment- or compute-stage are occupied - - whether the storage buffer on that bind slot is readonly (this is currently required - to be true) + - whether the storage buffer on that bind slot is readonly (readonly + bindings are required for vertex- and fragment-shaders, and in compute + shaders the readonly flag is used to control hazard tracking in some + 3D backends) + - when calling sg_apply_bindings(), apply the matching bind slots with the previously created storage buffers - ...and that's it. @@ -1056,6 +1177,12 @@ - the Ozz animation sample rewritten to pull all rendering data from storage buffers: - C code: https://github.com/floooh/sokol-samples/blob/master/sapp/ozz-storagebuffer-sapp.cc - shader: https://github.com/floooh/sokol-samples/blob/master/sapp/ozz-storagebuffer-sapp.glsl + - the instancing sample modified to use compute shaders: + - C code: https://github.com/floooh/sokol-samples/blob/master/sapp/instancing-compute-sapp.c + - shader: https://github.com/floooh/sokol-samples/blob/master/sapp/instancing-compute-sapp.glsl + - the Compute Boids sample ported to sokol-gfx: + - C code: https://github.com/floooh/sokol-samples/blob/master/sapp/computeboids-sapp.c + - shader: https://github.com/floooh/sokol-samples/blob/master/sapp/computeboids-sapp.glsl ...also see the following backend-specific vertex pulling samples (those also don't use sokol-shdc): @@ -1064,16 +1191,25 @@ - Metal: https://github.com/floooh/sokol-samples/blob/master/metal/vertexpulling-metal.c - WebGPU: https://github.com/floooh/sokol-samples/blob/master/wgpu/vertexpulling-wgpu.c + ...and the backend specific compute shader samples: + + - D3D11: https://github.com/floooh/sokol-samples/blob/master/d3d11/instancing-compute-d3d11.c + - desktop GL: https://github.com/floooh/sokol-samples/blob/master/glfw/instancing-compute-glfw.c + - Metal: https://github.com/floooh/sokol-samples/blob/master/metal/instancing-compute-metal.c + - WebGPU: https://github.com/floooh/sokol-samples/blob/master/wgpu/instancing-compute-wgpu.c + Storage buffer shader authoring caveats when using sokol-shdc: - - declare a storage buffer interface block with `layout(binding=N) readonly buffer [name] { ... }` + - declare a read-only storage buffer interface block with `layout(binding=N) readonly buffer [name] { ... }` (where 'N' is the index in `sg_bindings.storage_buffers[N]`) + - ...or a read/write storage buffer interface block with `layout(binding=N) buffer [name] { ... }` - declare a struct which describes a single array item in the storage buffer interface block - only put a single flexible array member into the storage buffer interface block - E.g. a complete example in 'sokol-shdc GLSL': + E.g. a complete example in 'sokol-shdc GLSL': ```glsl + @vs // declare a struct: struct sb_vertex { vec3 pos; @@ -1088,17 +1224,43 @@ vec3 pos = vtx[gl_VertexIndex].pos; ... } + @end ``` + In a compute shader you can read and write the same item in the same + storage buffer (but you'll have to be careful for random access since + many threads of the same compute function run in parallel): + + @cs + struct sb_item { + vec3 pos; + vec3 vel; + } + layout(binding=0) buffer items_ssbo { + sb_item items[]; + } + layout(local_size_x=64, local_size_y=1, local_size_z=1) in; + void main() { + uint idx = gl_GlobalInvocationID.x; + vec3 pos = items[idx].pos; + ... + items[idx].pos = pos; + } + @end + Backend-specific storage-buffer caveats (not relevant when using sokol-shdc): D3D11: - storage buffers are created as 'raw' Byte Address Buffers (https://learn.microsoft.com/en-us/windows/win32/direct3d11/overviews-direct3d-11-resources-intro#raw-views-of-buffers) - - in HLSL, use a ByteAddressBuffer to access the buffer content + - in HLSL, use a ByteAddressBuffer for readonly access of the buffer content: (https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-byteaddressbuffer) - - in D3D11, storage buffers and textures share the same bind slots (declared as - `register(tN)` in HLSL), where N must be in the range 0..23) + - ...or RWByteAddressBuffer for read/write access: + (https://learn.microsoft.com/en-us/windows/win32/direct3dhlsl/sm5-object-rwbyteaddressbuffer) + - readonly-storage buffers and textures are both bound as 'shader-resource-view' and + share the same bind slots (declared as `register(tN)` in HLSL), where N must be in the range 0..23) + - read/write storage buffers are bound as 'unordered-access-view' (declared as `register(uN)` in HLSL + where N is in the range 0..7) Metal: - in Metal there is no internal difference between vertex-, uniform- and @@ -1866,7 +2028,7 @@ typedef struct sg_features { bool image_clamp_to_border; // border color and clamp-to-border uv-wrap mode is supported bool mrt_independent_blend_state; // multiple-render-target rendering can use per-render-target blend state bool mrt_independent_write_mask; // multiple-render-target rendering can use per-render-target color write masks - bool storage_buffer; // storage buffers are supported + bool compute; // storage buffers and compute shaders are supported bool msaa_image_bindings; // if true, multisampled images can be bound as texture resources } sg_features; @@ -2248,7 +2410,7 @@ typedef enum sg_uniform_type { only relevant for the GL backend where the internal layout of uniform blocks must be known to sokol-gfx. For all other backends the internal memory layout of uniform blocks doesn't matter, sokol-gfx - will just pass uniform data as a single memory blob to the + will just pass uniform data as an opaque memory blob to the 3D backend. SG_UNIFORMLAYOUT_NATIVE (default) @@ -2332,7 +2494,7 @@ typedef enum sg_face_winding { .compare .stencil .front.compare - .back.compar + .back.compare sg_sampler_desc .compare @@ -2673,33 +2835,33 @@ typedef struct sg_swapchain { The sg_pass structure is passed as argument into the sg_begin_pass() function. - For an offscreen rendering pass, an sg_pass_action struct and sg_attachments - object must be provided, and for swapchain passes, an sg_pass_action and - an sg_swapchain struct. It is an error to provide both an sg_attachments - handle and an initialized sg_swapchain struct in the same sg_begin_pass(). - - An sg_begin_pass() call for an offscreen pass would look like this (where - `attachments` is an sg_attachments handle): - - sg_begin_pass(&(sg_pass){ - .action = { ... }, - .attachments = attachments, - }); - - ...and a swapchain render pass would look like this (using the sokol_glue.h - helper function sglue_swapchain() which gets the swapchain properties from - sokol_app.h): + For a swapchain render pass, provide an sg_pass_action and sg_swapchain + struct (for instance via the sglue_swapchain() helper function from + sokol_glue.h): sg_begin_pass(&(sg_pass){ .action = { ... }, .swapchain = sglue_swapchain(), }); + For an offscreen render pass, provide an sg_pass_action struct and + an sg_attachments handle: + + sg_begin_pass(&(sg_pass){ + .action = { ... }, + .attachments = attachments, + }); + You can also omit the .action object to get default pass action behaviour (clear to color=grey, depth=1 and stencil=0). + + For a compute pass, just set the sg_pass.compute boolean to true: + + sg_begin_pass(&(sg_pass){ .compute = true }); */ typedef struct sg_pass { uint32_t _start_canary; + bool compute; sg_pass_action action; sg_attachments attachments; sg_swapchain swapchain; @@ -2736,6 +2898,9 @@ typedef struct sg_pass { - SG_MAX_SAMPLER_BINDSLOTS - SG_MAX_STORAGEBUFFER_BINDGLOTS + Note that inside compute passes vertex- and index-buffer-bindings are + disallowed. + When using sokol-shdc for shader authoring, the `layout(binding=N)` annotation in the shader code directly maps to the slot index for that resource type in the bindings struct, for instance the following vertex- @@ -2822,9 +2987,13 @@ typedef struct sg_bindings { keep the .size item zero-initialized, and set the size together with the pointer to the initial data in the .data item. - For mutable buffers without initial data, keep the .data item + For immutable or mutable buffers without initial data, keep the .data item zero-initialized, and set the buffer size in the .size item instead. + NOTE: Immutable buffers without initial data are guaranteed to be + zero-initialized. For mutable (dynamic or streaming) buffers, the + initial content is undefined. + You can also set both size values, but currently both size values must be identical (this may change in the future when the dynamic resource management may become more flexible). @@ -3017,11 +3186,26 @@ typedef struct sg_sampler_desc { - for D3D11: an optional compile target when source code is provided (the defaults are "vs_4_0" and "ps_4_0") - - vertex attributes required by some backends: + - ...or alternatively, a compute function: + - the shader source or bytecode + - an optional entry point name + - for D3D11: an optional compile target when source code is provided + (the default is "cs_5_0") + + - vertex attributes required by some backends (not for compute shaders): - for the GL backend: optional vertex attribute names used for name lookup - for the D3D11 backend: semantic names and indices + - only for compute shaders on the Metal backend: + - the workgroup size aka 'threads per thread-group' + + In other 3D APIs this is declared in the shader code: + - GLSL: `layout(local_size_x=x, local_size_y=y, local_size_y=z) in;` + - HLSL: `[numthreads(x, y, z)]` + - WGSL: `@workgroup_size(x, y, z)` + ...but in Metal the workgroup size is declared on the CPU side + - reflection information for each uniform block used by the shader: - the shader stage the uniform block appears in (SG_SHADERSTAGE_*) - the size in bytes of the uniform block @@ -3059,7 +3243,9 @@ typedef struct sg_sampler_desc { - whether the storage buffer is readonly (currently this must always be true) - backend specific bindslots: - - HLSL: the texture(sic) register `register(t0..23)` + - HLSL: + - for readonly storage buffer bindings: `register(t0..23)` + - for read/write storage buffer bindings: `register(u0..7)` - MSL: the buffer attribute `[[buffer(8..15)]]` - WGSL: the binding in `@group(1) @binding(0..127)` - GL: the binding in `layout(binding=0..7)` @@ -3107,6 +3293,7 @@ typedef enum sg_shader_stage { SG_SHADERSTAGE_NONE, SG_SHADERSTAGE_VERTEX, SG_SHADERSTAGE_FRAGMENT, + SG_SHADERSTAGE_COMPUTE, } sg_shader_stage; typedef struct sg_shader_function { @@ -3159,7 +3346,8 @@ typedef struct sg_shader_sampler { typedef struct sg_shader_storage_buffer { sg_shader_stage stage; bool readonly; - uint8_t hlsl_register_t_n; // HLSL register(tn) bind slot + uint8_t hlsl_register_t_n; // HLSL register(tn) bind slot (for readonly access) + uint8_t hlsl_register_u_n; // HLSL register(un) bind slot (for read/write access) uint8_t msl_buffer_n; // MSL [[buffer(n)]] bind slot uint8_t wgsl_group1_binding_n; // WGSL @group(1) @binding(n) bind slot uint8_t glsl_binding_n; // GLSL layout(binding=n) @@ -3172,16 +3360,22 @@ typedef struct sg_shader_image_sampler_pair { const char* glsl_name; // glsl name binding required because of GL 4.1 and WebGL2 } sg_shader_image_sampler_pair; +typedef struct sg_mtl_shader_threads_per_threadgroup { + int x, y, z; +} sg_mtl_shader_threads_per_threadgroup; + typedef struct sg_shader_desc { uint32_t _start_canary; sg_shader_function vertex_func; sg_shader_function fragment_func; + sg_shader_function compute_func; sg_shader_vertex_attr attrs[SG_MAX_VERTEX_ATTRIBUTES]; sg_shader_uniform_block uniform_blocks[SG_MAX_UNIFORMBLOCK_BINDSLOTS]; sg_shader_storage_buffer storage_buffers[SG_MAX_STORAGEBUFFER_BINDSLOTS]; sg_shader_image images[SG_MAX_IMAGE_BINDSLOTS]; sg_shader_sampler samplers[SG_MAX_SAMPLER_BINDSLOTS]; sg_shader_image_sampler_pair image_sampler_pairs[SG_MAX_IMAGE_SAMPLER_PAIRS]; + sg_mtl_shader_threads_per_threadgroup mtl_threads_per_threadgroup; const char* label; uint32_t _end_canary; } sg_shader_desc; @@ -3192,6 +3386,15 @@ typedef struct sg_shader_desc { The sg_pipeline_desc struct defines all creation parameters for an sg_pipeline object, used as argument to the sg_make_pipeline() function: + Pipeline objects come in two flavours: + + - render pipelines for use in render passes + - compute pipelines for use in compute passes + + A compute pipeline only requires a compute shader object but no + 'render state', while a render pipeline requires a vertex/fragment shader + object and additional render state declarations: + - the vertex layout for all input vertex buffers - a shader object - the 3D primitive type (points, lines, triangles, ...) @@ -3207,6 +3410,7 @@ typedef struct sg_shader_desc { The default configuration is as follows: + .compute: false (must be set to true for a compute pipeline) .shader: 0 (must be initialized with a valid sg_shader id!) .layout: .buffers[]: vertex buffer layouts @@ -3315,6 +3519,7 @@ typedef struct sg_color_target_state { typedef struct sg_pipeline_desc { uint32_t _start_canary; + bool compute; sg_shader shader; sg_vertex_layout_state layout; sg_depth_state depth; @@ -3415,6 +3620,7 @@ typedef struct sg_trace_hooks { void (*apply_bindings)(const sg_bindings* bindings, void* user_data); void (*apply_uniforms)(int ub_index, const sg_range* data, void* user_data); void (*draw)(int base_element, int num_elements, int num_instances, void* user_data); + void (*dispatch)(int num_groups_x, int num_groups_y, int num_groups_z, void* user_data); void (*end_pass)(void* user_data); void (*commit)(void* user_data); void (*alloc_buffer)(sg_buffer result, void* user_data); @@ -3472,7 +3678,7 @@ typedef struct sg_trace_hooks { sg_query_sampler_info() sg_query_shader_info() sg_query_pipeline_info() - sg_query_pass_info() + sg_query_attachments_info() */ typedef struct sg_slot_info { sg_resource_state state; // the current state of this resource slot @@ -3531,6 +3737,7 @@ typedef struct sg_frame_stats_gl { uint32_t num_enable_vertex_attrib_array; uint32_t num_disable_vertex_attrib_array; uint32_t num_uniform; + uint32_t num_memory_barriers; } sg_frame_stats_gl; typedef struct sg_frame_stats_d3d11_pass { @@ -3550,15 +3757,20 @@ typedef struct sg_frame_stats_d3d11_pipeline { uint32_t num_vs_set_constant_buffers; uint32_t num_ps_set_shader; uint32_t num_ps_set_constant_buffers; + uint32_t num_cs_set_shader; + uint32_t num_cs_set_constant_buffers; } sg_frame_stats_d3d11_pipeline; typedef struct sg_frame_stats_d3d11_bindings { uint32_t num_ia_set_vertex_buffers; uint32_t num_ia_set_index_buffer; uint32_t num_vs_set_shader_resources; - uint32_t num_ps_set_shader_resources; uint32_t num_vs_set_samplers; + uint32_t num_ps_set_shader_resources; uint32_t num_ps_set_samplers; + uint32_t num_cs_set_shader_resources; + uint32_t num_cs_set_samplers; + uint32_t num_cs_set_unordered_access_views; } sg_frame_stats_d3d11_bindings; typedef struct sg_frame_stats_d3d11_uniforms { @@ -3605,11 +3817,15 @@ typedef struct sg_frame_stats_metal_bindings { uint32_t num_set_fragment_buffer; uint32_t num_set_fragment_texture; uint32_t num_set_fragment_sampler_state; + uint32_t num_set_compute_buffer; + uint32_t num_set_compute_texture; + uint32_t num_set_compute_sampler_state; } sg_frame_stats_metal_bindings; typedef struct sg_frame_stats_metal_uniforms { uint32_t num_set_vertex_buffer_offset; uint32_t num_set_fragment_buffer_offset; + uint32_t num_set_compute_buffer_offset; } sg_frame_stats_metal_uniforms; typedef struct sg_frame_stats_metal { @@ -3655,6 +3871,7 @@ typedef struct sg_frame_stats { uint32_t num_apply_bindings; uint32_t num_apply_uniforms; uint32_t num_draw; + uint32_t num_dispatch; uint32_t num_update_buffer; uint32_t num_append_buffer; uint32_t num_update_image; @@ -3697,6 +3914,7 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(GL_FRAMEBUFFER_STATUS_UNKNOWN, "framebuffer completeness check failed (unknown reason) (gl)") \ _SG_LOGITEM_XMACRO(D3D11_CREATE_BUFFER_FAILED, "CreateBuffer() failed (d3d11)") \ _SG_LOGITEM_XMACRO(D3D11_CREATE_BUFFER_SRV_FAILED, "CreateShaderResourceView() failed for storage buffer (d3d11)") \ + _SG_LOGITEM_XMACRO(D3D11_CREATE_BUFFER_UAV_FAILED, "CreateUnorderedAccessView() failed for storage buffer (d3d11)") \ _SG_LOGITEM_XMACRO(D3D11_CREATE_DEPTH_TEXTURE_UNSUPPORTED_PIXEL_FORMAT, "pixel format not supported for depth-stencil texture (d3d11)") \ _SG_LOGITEM_XMACRO(D3D11_CREATE_DEPTH_TEXTURE_FAILED, "CreateTexture2D() failed for depth-stencil texture (d3d11)") \ _SG_LOGITEM_XMACRO(D3D11_CREATE_2D_TEXTURE_UNSUPPORTED_PIXEL_FORMAT, "pixel format not supported for 2d-, cube- or array-texture (d3d11)") \ @@ -3709,6 +3927,7 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(D3D11_CREATE_SAMPLER_STATE_FAILED, "CreateSamplerState() failed (d3d11)") \ _SG_LOGITEM_XMACRO(D3D11_UNIFORMBLOCK_HLSL_REGISTER_B_OUT_OF_RANGE, "uniform block 'hlsl_register_b_n' is out of range (must be 0..7)") \ _SG_LOGITEM_XMACRO(D3D11_STORAGEBUFFER_HLSL_REGISTER_T_OUT_OF_RANGE, "storage buffer 'hlsl_register_t_n' is out of range (must be 0..23)") \ + _SG_LOGITEM_XMACRO(D3D11_STORAGEBUFFER_HLSL_REGISTER_U_OUT_OF_RANGE, "storage buffer 'hlsl_register_u_n' is out of range (must be 0..7)") \ _SG_LOGITEM_XMACRO(D3D11_IMAGE_HLSL_REGISTER_T_OUT_OF_RANGE, "image 'hlsl_register_t_n' is out of range (must be 0..23)") \ _SG_LOGITEM_XMACRO(D3D11_SAMPLER_HLSL_REGISTER_S_OUT_OF_RANGE, "sampler 'hlsl_register_s_n' is out of rang (must be 0..15)") \ _SG_LOGITEM_XMACRO(D3D11_LOAD_D3DCOMPILER_47_DLL_FAILED, "loading d3dcompiler_47.dll failed (d3d11)") \ @@ -3736,6 +3955,8 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(METAL_STORAGEBUFFER_MSL_BUFFER_SLOT_OUT_OF_RANGE, "storage buffer 'msl_buffer_n' is out of range (must be 8..15)") \ _SG_LOGITEM_XMACRO(METAL_IMAGE_MSL_TEXTURE_SLOT_OUT_OF_RANGE, "image 'msl_texture_n' is out of range (must be 0..15)") \ _SG_LOGITEM_XMACRO(METAL_SAMPLER_MSL_SAMPLER_SLOT_OUT_OF_RANGE, "sampler 'msl_sampler_n' is out of range (must be 0..15)") \ + _SG_LOGITEM_XMACRO(METAL_CREATE_CPS_FAILED, "failed to create compute pipeline state (metal)") \ + _SG_LOGITEM_XMACRO(METAL_CREATE_CPS_OUTPUT, "") \ _SG_LOGITEM_XMACRO(METAL_CREATE_RPS_FAILED, "failed to create render pipeline state (metal)") \ _SG_LOGITEM_XMACRO(METAL_CREATE_RPS_OUTPUT, "") \ _SG_LOGITEM_XMACRO(METAL_CREATE_DSS_FAILED, "failed to create depth stencil state (metal)") \ @@ -3755,8 +3976,8 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(WGPU_SAMPLER_WGSL_GROUP1_BINDING_OUT_OF_RANGE, "sampler 'wgsl_group1_binding_n' is out of range (must be 0..127)") \ _SG_LOGITEM_XMACRO(WGPU_CREATE_PIPELINE_LAYOUT_FAILED, "wgpuDeviceCreatePipelineLayout() failed") \ _SG_LOGITEM_XMACRO(WGPU_CREATE_RENDER_PIPELINE_FAILED, "wgpuDeviceCreateRenderPipeline() failed") \ + _SG_LOGITEM_XMACRO(WGPU_CREATE_COMPUTE_PIPELINE_FAILED, "wgpuDeviceCreateComputePipeline() failed") \ _SG_LOGITEM_XMACRO(WGPU_ATTACHMENTS_CREATE_TEXTURE_VIEW_FAILED, "wgpuTextureCreateView() failed in create attachments") \ - _SG_LOGITEM_XMACRO(DRAW_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING, "call to sg_apply_bindings() and/or sg_apply_uniforms() missing after sg_apply_pipeline()") \ _SG_LOGITEM_XMACRO(IDENTICAL_COMMIT_LISTENER, "attempting to add identical commit listener") \ _SG_LOGITEM_XMACRO(COMMIT_LISTENER_ARRAY_FULL, "commit listener array full") \ _SG_LOGITEM_XMACRO(TRACE_HOOKS_NOT_ENABLED, "sg_install_trace_hooks() called, but SOKOL_TRACE_HOOKS is not defined") \ @@ -3791,12 +4012,13 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(PIPELINE_POOL_EXHAUSTED, "pipeline pool exhausted") \ _SG_LOGITEM_XMACRO(PASS_POOL_EXHAUSTED, "pass pool exhausted") \ _SG_LOGITEM_XMACRO(BEGINPASS_ATTACHMENT_INVALID, "sg_begin_pass: an attachment was provided that no longer exists") \ + _SG_LOGITEM_XMACRO(APPLY_BINDINGS_STORAGE_BUFFER_TRACKER_EXHAUSTED, "sg_apply_bindings: too many read/write storage buffers in pass (bump sg_desc.max_dispatch_calls_per_pass") \ _SG_LOGITEM_XMACRO(DRAW_WITHOUT_BINDINGS, "attempting to draw without resource bindings") \ _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_CANARY, "sg_buffer_desc not initialized") \ - _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_SIZE, "sg_buffer_desc.size and .data.size cannot both be 0") \ - _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_DATA, "immutable buffers must be initialized with data (sg_buffer_desc.data.ptr and sg_buffer_desc.data.size)") \ - _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_DATA_SIZE, "immutable buffer data size differs from buffer size") \ - _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_NO_DATA, "dynamic/stream usage buffers cannot be initialized with data") \ + _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_EXPECT_NONZERO_SIZE, "sg_buffer_desc.size must be greater zero") \ + _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_EXPECT_MATCHING_DATA_SIZE, "sg_buffer_desc.size and .data.size must be equal") \ + _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_EXPECT_ZERO_DATA_SIZE, "sg_buffer_desc.data.size expected to be zero") \ + _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_EXPECT_NO_DATA, "sg_buffer_desc.data.ptr must be null for dynamic/stream buffers") \ _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_STORAGEBUFFER_SUPPORTED, "storage buffers not supported by the backend 3D API (requires OpenGL >= 4.3)") \ _SG_LOGITEM_XMACRO(VALIDATE_BUFFERDESC_STORAGEBUFFER_SIZE_MULTIPLE_4, "size of storage buffers must be a multiple of 4") \ _SG_LOGITEM_XMACRO(VALIDATE_IMAGEDATA_NODATA, "sg_image_data: no data (.ptr and/or .size is zero)") \ @@ -3820,10 +4042,15 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_SAMPLERDESC_CANARY, "sg_sampler_desc not initialized") \ _SG_LOGITEM_XMACRO(VALIDATE_SAMPLERDESC_ANISTROPIC_REQUIRES_LINEAR_FILTERING, "sg_sampler_desc.max_anisotropy > 1 requires min/mag/mipmap_filter to be SG_FILTER_LINEAR") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_CANARY, "sg_shader_desc not initialized") \ - _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_SOURCE, "shader source code required") \ - _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_BYTECODE, "shader byte code required") \ - _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_SOURCE_OR_BYTECODE, "shader source or byte code required") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_VERTEX_SOURCE, "vertex shader source code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_FRAGMENT_SOURCE, "fragment shader source code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_COMPUTE_SOURCE, "compute shader source code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_VERTEX_SOURCE_OR_BYTECODE, "vertex shader source or byte code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_FRAGMENT_SOURCE_OR_BYTECODE, "fragment shader source or byte code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_COMPUTE_SOURCE_OR_BYTECODE, "compute shader source or byte code expected") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_INVALID_SHADER_COMBO, "cannot combine compute shaders with vertex or fragment shaders") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_NO_BYTECODE_SIZE, "shader byte code length (in bytes) required") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_METAL_THREADS_PER_THREADGROUP, "sg_shader_desc.mtl_threads_per_threadgroup must be initialized for compute shaders (metal)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_UNIFORMBLOCK_NO_CONT_MEMBERS, "uniform block members must occupy continuous slots") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_UNIFORMBLOCK_SIZE_IS_ZERO, "bound uniform block size cannot be zero") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_UNIFORMBLOCK_METAL_BUFFER_SLOT_OUT_OF_RANGE, "uniform block 'msl_buffer_n' is out of range (must be 0..7)") \ @@ -3840,12 +4067,13 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_METAL_BUFFER_SLOT_OUT_OF_RANGE, "storage buffer 'msl_buffer_n' is out of range (must be 8..15)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_METAL_BUFFER_SLOT_COLLISION, "storage buffer 'msl_buffer_n' must be unique across uniform blocks and storage buffer in same shader stage") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_OUT_OF_RANGE, "storage buffer 'hlsl_register_t_n' is out of range (must be 0..23)") \ - _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_COLLISION, "storage_buffer 'hlsl_register_t_n' must be unique across storage buffers and images in same shader stage") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_COLLISION, "storage_buffer 'hlsl_register_t_n' must be unique across read-only storage buffers and images in same shader stage") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_U_OUT_OF_RANGE, "storage buffer 'hlsl_register_u_n' is out of range (must be 0..7)") \ + _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_U_COLLISION, "storage_buffer 'hlsl_register_u_n' must be unique across read/write storage buffers in same shader stage") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_GLSL_BINDING_OUT_OF_RANGE, "storage buffer 'glsl_binding_n' is out of range (must be 0..7)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_GLSL_BINDING_COLLISION, "storage buffer 'glsl_binding_n' must be unique across shader stages") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_WGSL_GROUP1_BINDING_OUT_OF_RANGE, "storage buffer 'wgsl_group1_binding_n' is out of range (must be 0..127)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_WGSL_GROUP1_BINDING_COLLISION, "storage buffer 'wgsl_group1_binding_n' must be unique across all images, samplers and storage buffers") \ - _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_STORAGEBUFFER_READONLY, "shader stage storage buffers must be readonly (sg_shader_desc.storage_buffers[].readonly)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_IMAGE_METAL_TEXTURE_SLOT_OUT_OF_RANGE, "image 'msl_texture_n' is out of range (must be 0..15)") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_IMAGE_METAL_TEXTURE_SLOT_COLLISION, "image 'msl_texture_n' must be unique in same shader stage") \ _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_IMAGE_HLSL_REGISTER_T_OUT_OF_RANGE, "image 'hlsl_register_t_n' is out of range (must be 0..23)") \ @@ -3870,9 +4098,12 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_SHADERDESC_ATTR_STRING_TOO_LONG, "vertex attribute name/semantic string too long (max len 16)") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_CANARY, "sg_pipeline_desc not initialized") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_SHADER, "sg_pipeline_desc.shader missing or invalid") \ + _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_COMPUTE_SHADER_EXPECTED, "sg_pipeline_desc.shader must be a compute shader") \ + _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_NO_COMPUTE_SHADER_EXPECTED, "sg_pipeline_desc.compute is false, but shader is a compute shader") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_NO_CONT_ATTRS, "sg_pipeline_desc.layout.attrs is not continuous") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_LAYOUT_STRIDE4, "sg_pipeline_desc.layout.buffers[].stride must be multiple of 4") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_ATTR_SEMANTICS, "D3D11 missing vertex attribute semantics in shader") \ + _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_SHADER_READONLY_STORAGEBUFFERS, "sg_pipeline_desc.shader: only readonly storage buffer bindings allowed in render pipelines") \ _SG_LOGITEM_XMACRO(VALIDATE_PIPELINEDESC_BLENDOP_MINMAX_REQUIRES_BLENDFACTOR_ONE, "SG_BLENDOP_MIN/MAX requires all blend factors to be SG_BLENDFACTOR_ONE") \ _SG_LOGITEM_XMACRO(VALIDATE_ATTACHMENTSDESC_CANARY, "sg_attachments_desc not initialized") \ _SG_LOGITEM_XMACRO(VALIDATE_ATTACHMENTSDESC_NO_ATTACHMENTS, "sg_attachments_desc no color or depth-stencil attachments") \ @@ -3906,6 +4137,7 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_ATTACHMENTSDESC_DEPTH_IMAGE_SIZES, "pass depth attachment image size must match color attachment image size") \ _SG_LOGITEM_XMACRO(VALIDATE_ATTACHMENTSDESC_DEPTH_IMAGE_SAMPLE_COUNT, "pass depth attachment sample count must match color attachment sample count") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_CANARY, "sg_begin_pass: pass struct not initialized") \ + _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_EXPECT_NO_ATTACHMENTS, "sg_begin_pass: compute passes cannot have attachments") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_ATTACHMENTS_EXISTS, "sg_begin_pass: attachments object no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_ATTACHMENTS_VALID, "sg_begin_pass: attachments object not in resource state VALID") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_COLOR_ATTACHMENT_IMAGE, "sg_begin_pass: one or more color attachment images are not valid") \ @@ -3939,20 +4171,28 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_SWAPCHAIN_WGPU_EXPECT_DEPTHSTENCILVIEW, "sg_begin_pass: expected pass.swapchain.wgpu.depth_stencil_view != 0") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_SWAPCHAIN_WGPU_EXPECT_DEPTHSTENCILVIEW_NOTSET, "sg_begin_pass: expected pass.swapchain.wgpu.depth_stencil_view == 0") \ _SG_LOGITEM_XMACRO(VALIDATE_BEGINPASS_SWAPCHAIN_GL_EXPECT_FRAMEBUFFER_NOTSET, "sg_begin_pass: expected pass.swapchain.gl.framebuffer == 0") \ + _SG_LOGITEM_XMACRO(VALIDATE_AVP_RENDERPASS_EXPECTED, "sg_apply_viewport: must be called in a render pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_ASR_RENDERPASS_EXPECTED, "sg_apply_scissor_rect: must be called in a render pass") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_PIPELINE_VALID_ID, "sg_apply_pipeline: invalid pipeline id provided") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_PIPELINE_EXISTS, "sg_apply_pipeline: pipeline object no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_PIPELINE_VALID, "sg_apply_pipeline: pipeline object not in valid state") \ + _SG_LOGITEM_XMACRO(VALIDATE_APIP_PASS_EXPECTED, "sg_apply_pipeline: must be called in a pass") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_SHADER_EXISTS, "sg_apply_pipeline: shader object no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_SHADER_VALID, "sg_apply_pipeline: shader object not in valid state") \ + _SG_LOGITEM_XMACRO(VALIDATE_APIP_COMPUTEPASS_EXPECTED, "sg_apply_pipeline: trying to apply compute pipeline in render pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_APIP_RENDERPASS_EXPECTED, "sg_apply_pipeline: trying to apply render pipeline in compute pass") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_CURPASS_ATTACHMENTS_EXISTS, "sg_apply_pipeline: current pass attachments no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_CURPASS_ATTACHMENTS_VALID, "sg_apply_pipeline: current pass attachments not in valid state") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_ATT_COUNT, "sg_apply_pipeline: number of pipeline color attachments doesn't match number of pass color attachments") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_COLOR_FORMAT, "sg_apply_pipeline: pipeline color attachment pixel format doesn't match pass color attachment pixel format") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_DEPTH_FORMAT, "sg_apply_pipeline: pipeline depth pixel_format doesn't match pass depth attachment pixel format") \ _SG_LOGITEM_XMACRO(VALIDATE_APIP_SAMPLE_COUNT, "sg_apply_pipeline: pipeline MSAA sample count doesn't match render pass attachment sample count") \ + _SG_LOGITEM_XMACRO(VALIDATE_ABND_PASS_EXPECTED, "sg_apply_bindings: must be called in a pass") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_PIPELINE, "sg_apply_bindings: must be called after sg_apply_pipeline") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_PIPELINE_EXISTS, "sg_apply_bindings: currently applied pipeline object no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_PIPELINE_VALID, "sg_apply_bindings: currently applied pipeline object not in valid state") \ + _SG_LOGITEM_XMACRO(VALIDATE_ABND_COMPUTE_EXPECTED_NO_VBS, "sg_apply_bindings: vertex buffer bindings not allowed in a compute pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_ABND_COMPUTE_EXPECTED_NO_IB, "sg_apply_bindings: index buffer binding not allowed in compute pass") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_EXPECTED_VB, "sg_apply_bindings: vertex buffer binding is missing or buffer handle is invalid") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_VB_EXISTS, "sg_apply_bindings: vertex buffer no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_VB_TYPE, "sg_apply_bindings: buffer in vertex buffer slot is not a SG_BUFFERTYPE_VERTEXBUFFER") \ @@ -3976,10 +4216,22 @@ typedef struct sg_frame_stats { _SG_LOGITEM_XMACRO(VALIDATE_ABND_SMP_EXISTS, "sg_apply_bindings: bound sampler no longer alive") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_EXPECTED_STORAGEBUFFER_BINDING, "sg_apply_bindings: storage buffer binding is missing or the buffer handle is invalid") \ _SG_LOGITEM_XMACRO(VALIDATE_ABND_STORAGEBUFFER_EXISTS, "sg_apply_bindings: bound storage buffer no longer alive") \ - _SG_LOGITEM_XMACRO(VALIDATE_ABND_STORAGEBUFFER_BINDING_BUFFERTYPE, "sg_apply_bindings: buffer bound storage buffer slot is not of type storage buffer") \ - _SG_LOGITEM_XMACRO(VALIDATE_AUB_NO_PIPELINE, "sg_apply_uniforms: must be called after sg_apply_pipeline()") \ - _SG_LOGITEM_XMACRO(VALIDATE_AUB_NO_UNIFORMBLOCK_AT_SLOT, "sg_apply_uniforms: no uniform block declaration at this shader stage UB slot") \ - _SG_LOGITEM_XMACRO(VALIDATE_AUB_SIZE, "sg_apply_uniforms: data size doesn't match declared uniform block size") \ + _SG_LOGITEM_XMACRO(VALIDATE_ABND_STORAGEBUFFER_BINDING_BUFFERTYPE, "sg_apply_bindings: buffer bound to storage buffer slot is not of type storage buffer") \ + _SG_LOGITEM_XMACRO(VALIDATE_ABND_STORAGEBUFFER_READWRITE_IMMUTABLE, "sg_apply_bindings: storage buffers bound as read/write must have usage immutable") \ + _SG_LOGITEM_XMACRO(VALIDATE_AU_PASS_EXPECTED, "sg_apply_uniforms: must be called in a pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_AU_NO_PIPELINE, "sg_apply_uniforms: must be called after sg_apply_pipeline()") \ + _SG_LOGITEM_XMACRO(VALIDATE_AU_NO_UNIFORMBLOCK_AT_SLOT, "sg_apply_uniforms: no uniform block declaration at this shader stage UB slot") \ + _SG_LOGITEM_XMACRO(VALIDATE_AU_SIZE, "sg_apply_uniforms: data size doesn't match declared uniform block size") \ + _SG_LOGITEM_XMACRO(VALIDATE_DRAW_RENDERPASS_EXPECTED, "sg_draw: must be called in a render pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_DRAW_BASEELEMENT, "sg_draw: base_element cannot be < 0") \ + _SG_LOGITEM_XMACRO(VALIDATE_DRAW_NUMELEMENTS, "sg_draw: num_elements cannot be < 0") \ + _SG_LOGITEM_XMACRO(VALIDATE_DRAW_NUMINSTANCES, "sg_draw: num_instances cannot be < 0") \ + _SG_LOGITEM_XMACRO(VALIDATE_DRAW_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING, "sg_draw: call to sg_apply_bindings() and/or sg_apply_uniforms() missing after sg_apply_pipeline()") \ + _SG_LOGITEM_XMACRO(VALIDATE_DISPATCH_COMPUTEPASS_EXPECTED, "sg_dispatch: must be called in a compute pass") \ + _SG_LOGITEM_XMACRO(VALIDATE_DISPATCH_NUMGROUPSX, "sg_dispatch: num_groups_x must be >=0 and <65536") \ + _SG_LOGITEM_XMACRO(VALIDATE_DISPATCH_NUMGROUPSY, "sg_dispatch: num_groups_y must be >=0 and <65536") \ + _SG_LOGITEM_XMACRO(VALIDATE_DISPATCH_NUMGROUPSZ, "sg_dispatch: num_groups_z must be >=0 and <65536") \ + _SG_LOGITEM_XMACRO(VALIDATE_DISPATCH_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING, "sg_dispatch: call to sg_apply_bindings() and/or sg_apply_uniforms() missing after sg_apply_pipeline()") \ _SG_LOGITEM_XMACRO(VALIDATE_UPDATEBUF_USAGE, "sg_update_buffer: cannot update immutable buffer") \ _SG_LOGITEM_XMACRO(VALIDATE_UPDATEBUF_SIZE, "sg_update_buffer: update size is bigger than buffer size") \ _SG_LOGITEM_XMACRO(VALIDATE_UPDATEBUF_ONCE, "sg_update_buffer: only one update allowed per buffer and frame") \ @@ -4005,15 +4257,16 @@ typedef enum sg_log_item { The default configuration is: - .buffer_pool_size 128 - .image_pool_size 128 - .sampler_pool_size 64 - .shader_pool_size 32 - .pipeline_pool_size 64 - .pass_pool_size 16 - .uniform_buffer_size 4 MB (4*1024*1024) - .max_commit_listeners 1024 - .disable_validation false + .buffer_pool_size 128 + .image_pool_size 128 + .sampler_pool_size 64 + .shader_pool_size 32 + .pipeline_pool_size 64 + .attachments_pool_size 16 + .uniform_buffer_size 4 MB (4*1024*1024) + .max_dispatch_calls_per_pass 1024 + .max_commit_listeners 1024 + .disable_validation false .mtl_force_managed_storage_mode false .wgpu_disable_bindgroups_cache false .wgpu_bindgroups_cache_size 1024 @@ -4172,6 +4425,7 @@ typedef struct sg_desc { int pipeline_pool_size; int attachments_pool_size; int uniform_buffer_size; + int max_dispatch_calls_per_pass; // max expected number of dispatch calls per pass (default: 1024) int max_commit_listeners; bool disable_validation; // disable validation layer even in debug mode, useful for tests bool d3d11_shader_debugging; // if true, HLSL shaders are compiled with D3DCOMPILE_DEBUG | D3DCOMPILE_SKIP_OPTIMIZATION @@ -4225,6 +4479,7 @@ SOKOL_GFX_API_DECL void sg_apply_pipeline(sg_pipeline pip); SOKOL_GFX_API_DECL void sg_apply_bindings(const sg_bindings* bindings); SOKOL_GFX_API_DECL void sg_apply_uniforms(int ub_slot, const sg_range* data); SOKOL_GFX_API_DECL void sg_draw(int base_element, int num_elements, int num_instances); +SOKOL_GFX_API_DECL void sg_dispatch(int num_groups_x, int num_groups_y, int num_groups_z); SOKOL_GFX_API_DECL void sg_end_pass(void); SOKOL_GFX_API_DECL void sg_commit(void); @@ -4401,7 +4656,8 @@ typedef struct sg_wgpu_shader_info { } sg_wgpu_shader_info; typedef struct sg_wgpu_pipeline_info { - const void* pip; // WGPURenderPipeline + const void* render_pipeline; // WGPURenderPipeline + const void* compute_pipeline; // WGPUComputePipeline } sg_wgpu_pipeline_info; typedef struct sg_wgpu_attachments_info { @@ -4454,8 +4710,10 @@ SOKOL_GFX_API_DECL sg_d3d11_attachments_info sg_d3d11_query_attachments_info(sg_ // Metal: return __bridge-casted MTLDevice SOKOL_GFX_API_DECL const void* sg_mtl_device(void); -// Metal: return __bridge-casted MTLRenderCommandEncoder in current pass (or zero if outside pass) +// Metal: return __bridge-casted MTLRenderCommandEncoder when inside render pass (otherwise zero) SOKOL_GFX_API_DECL const void* sg_mtl_render_command_encoder(void); +// Metal: return __bridge-casted MTLComputeCommandEncoder when inside compute pass (otherwise zero) +SOKOL_GFX_API_DECL const void* sg_mtl_compute_command_encoder(void); // Metal: get internal __bridge-casted buffer resource objects SOKOL_GFX_API_DECL sg_mtl_buffer_info sg_mtl_query_buffer_info(sg_buffer buf); // Metal: get internal __bridge-casted image resource objects @@ -4473,8 +4731,10 @@ SOKOL_GFX_API_DECL const void* sg_wgpu_device(void); SOKOL_GFX_API_DECL const void* sg_wgpu_queue(void); // WebGPU: return this frame's WGPUCommandEncoder SOKOL_GFX_API_DECL const void* sg_wgpu_command_encoder(void); -// WebGPU: return WGPURenderPassEncoder of current pass +// WebGPU: return WGPURenderPassEncoder of current pass (returns 0 when outside pass or in a compute pass) SOKOL_GFX_API_DECL const void* sg_wgpu_render_pass_encoder(void); +// WebGPU: return WGPUComputePassEncoder of current pass (returns 0 when outside pass or in a render pass) +SOKOL_GFX_API_DECL const void* sg_wgpu_compute_pass_encoder(void); // WebGPU: get internal buffer resource objects SOKOL_GFX_API_DECL sg_wgpu_buffer_info sg_wgpu_query_buffer_info(sg_buffer buf); // WebGPU: get internal image resource objects @@ -4680,6 +4940,7 @@ inline int sg_append_buffer(sg_buffer buf_id, const sg_range& data) { return sg_ #include #define _SOKOL_USE_WIN32_GL_LOADER (1) #pragma comment (lib, "kernel32") // GetProcAddress() + #define _SOKOL_GL_HAS_COMPUTE (1) #endif #elif defined(__APPLE__) #include @@ -4700,6 +4961,7 @@ inline int sg_append_buffer(sg_buffer buf_id, const sg_range& data) { return sg_ #if defined(SOKOL_GLCORE) #define GL_GLEXT_PROTOTYPES #include + #define _SOKOL_GL_HAS_COMPUTE (1) #else #include #include @@ -4805,6 +5067,7 @@ inline int sg_append_buffer(sg_buffer buf_id, const sg_range& data) { return sg_ #define GL_TEXTURE_CUBE_MAP_POSITIVE_X 0x8515 #define GL_DECR 0x1E03 #define GL_FRAGMENT_SHADER 0x8B30 + #define GL_COMPUTE_SHADER 0x91B9 #define GL_FLOAT 0x1406 #define GL_TEXTURE_MAX_LOD 0x813B #define GL_DEPTH_COMPONENT 0x1902 @@ -4958,6 +5221,7 @@ inline int sg_append_buffer(sg_buffer buf_id, const sg_range& data) { return sg_ #define GL_MINOR_VERSION 0x821C #define GL_TEXTURE_2D_MULTISAMPLE 0x9100 #define GL_TEXTURE_2D_MULTISAMPLE_ARRAY 0x9102 + #define GL_SHADER_STORAGE_BARRIER_BIT 0x2000 #define GL_MIN 0x8007 #define GL_MAX 0x8008 #endif @@ -5055,6 +5319,9 @@ inline int sg_append_buffer(sg_buffer buf_id, const sg_range& data) { return sg_ #ifndef GL_LUMINANCE #define GL_LUMINANCE 0x1909 #endif + #ifndef GL_COMPUTE_SHADER + #define GL_COMPUTE_SHADER 0x91B9 + #endif #ifndef _SG_GL_CHECK_ERROR #define _SG_GL_CHECK_ERROR() { SOKOL_ASSERT(glGetError() == GL_NO_ERROR); } #endif @@ -5096,14 +5363,31 @@ typedef struct { int* free_queue; } _sg_pool_t; -_SOKOL_PRIVATE void _sg_init_pool(_sg_pool_t* pool, int num); -_SOKOL_PRIVATE void _sg_discard_pool(_sg_pool_t* pool); +_SOKOL_PRIVATE void _sg_pool_init(_sg_pool_t* pool, int num); +_SOKOL_PRIVATE void _sg_pool_discard(_sg_pool_t* pool); _SOKOL_PRIVATE int _sg_pool_alloc_index(_sg_pool_t* pool); _SOKOL_PRIVATE void _sg_pool_free_index(_sg_pool_t* pool, int slot_index); -_SOKOL_PRIVATE void _sg_reset_slot(_sg_slot_t* slot); +_SOKOL_PRIVATE void _sg_slot_reset(_sg_slot_t* slot); _SOKOL_PRIVATE uint32_t _sg_slot_alloc(_sg_pool_t* pool, _sg_slot_t* slot, int slot_index); _SOKOL_PRIVATE int _sg_slot_index(uint32_t id); +// resource func forward decls +struct _sg_pools_s; +struct _sg_buffer_s; +_SOKOL_PRIVATE struct _sg_buffer_s* _sg_lookup_buffer(const struct _sg_pools_s* p, uint32_t buf_id); + +// resource tracking (for keeping track of gpu-written storage resources +typedef struct { + uint32_t size; + uint32_t cur; + uint32_t* items; +} _sg_tracker_t; + +_SOKOL_PRIVATE void _sg_tracker_init(_sg_tracker_t* tracker, uint32_t num); +_SOKOL_PRIVATE void _sg_tracker_discard(_sg_tracker_t* tracker); +_SOKOL_PRIVATE void _sg_tracker_reset(_sg_tracker_t* tracker); +_SOKOL_PRIVATE bool _sg_tracker_add(_sg_tracker_t* tracker, uint32_t res_id); + // constants enum { _SG_STRING_SIZE = 32, @@ -5117,6 +5401,7 @@ enum { _SG_DEFAULT_PIPELINE_POOL_SIZE = 64, _SG_DEFAULT_ATTACHMENTS_POOL_SIZE = 16, _SG_DEFAULT_UB_SIZE = 4 * 1024 * 1024, + _SG_DEFAULT_MAX_DISPATCH_CALLS_PER_PASS = 1024, _SG_DEFAULT_MAX_COMMIT_LISTENERS = 1024, _SG_DEFAULT_WGPU_BINDGROUP_CACHE_SIZE = 1024, }; @@ -5253,6 +5538,7 @@ typedef struct { typedef struct { uint32_t required_bindings_and_uniforms; + bool is_compute; _sg_shader_uniform_block_t uniform_blocks[SG_MAX_UNIFORMBLOCK_BINDSLOTS]; _sg_shader_storage_buffer_t storage_buffers[SG_MAX_STORAGEBUFFER_BINDSLOTS]; _sg_shader_image_t images[SG_MAX_IMAGE_BINDSLOTS]; @@ -5261,6 +5547,7 @@ typedef struct { } _sg_shader_common_t; _SOKOL_PRIVATE void _sg_shader_common_init(_sg_shader_common_t* cmn, const sg_shader_desc* desc) { + cmn->is_compute = desc->compute_func.source || desc->compute_func.bytecode.ptr; for (size_t i = 0; i < SG_MAX_UNIFORMBLOCK_BINDSLOTS; i++) { const sg_shader_uniform_block* src = &desc->uniform_blocks[i]; _sg_shader_uniform_block_t* dst = &cmn->uniform_blocks[i]; @@ -5318,6 +5605,7 @@ _SOKOL_PRIVATE void _sg_shader_common_init(_sg_shader_common_t* cmn, const sg_sh typedef struct { bool vertex_buffer_layout_active[SG_MAX_VERTEXBUFFER_BINDSLOTS]; bool use_instanced_draw; + bool is_compute; uint32_t required_bindings_and_uniforms; sg_shader shader_id; sg_vertex_layout_state layout; @@ -5336,6 +5624,9 @@ typedef struct { _SOKOL_PRIVATE void _sg_pipeline_common_init(_sg_pipeline_common_t* cmn, const sg_pipeline_desc* desc) { SOKOL_ASSERT((desc->color_count >= 0) && (desc->color_count <= SG_MAX_COLOR_ATTACHMENTS)); + + // FIXME: most of this isn't needed for compute pipelines + const uint32_t required_bindings_flag = (1 << SG_MAX_UNIFORMBLOCK_BINDSLOTS); for (int i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { const sg_vertex_attr_state* a_state = &desc->layout.attrs[i]; @@ -5345,6 +5636,7 @@ _SOKOL_PRIVATE void _sg_pipeline_common_init(_sg_pipeline_common_t* cmn, const s cmn->required_bindings_and_uniforms |= required_bindings_flag; } } + cmn->is_compute = desc->compute; cmn->use_instanced_draw = false; cmn->shader_id = desc->shader; cmn->layout = desc->layout; @@ -5404,31 +5696,31 @@ _SOKOL_PRIVATE void _sg_attachments_common_init(_sg_attachments_common_t* cmn, c } #if defined(SOKOL_DUMMY_BACKEND) -typedef struct { +typedef struct _sg_buffer_s { _sg_slot_t slot; _sg_buffer_common_t cmn; } _sg_dummy_buffer_t; typedef _sg_dummy_buffer_t _sg_buffer_t; -typedef struct { +typedef struct _sg_image_s { _sg_slot_t slot; _sg_image_common_t cmn; } _sg_dummy_image_t; typedef _sg_dummy_image_t _sg_image_t; -typedef struct { +typedef struct _sg_sampler_s { _sg_slot_t slot; _sg_sampler_common_t cmn; } _sg_dummy_sampler_t; typedef _sg_dummy_sampler_t _sg_sampler_t; -typedef struct { +typedef struct _sg_shader_s { _sg_slot_t slot; _sg_shader_common_t cmn; } _sg_dummy_shader_t; typedef _sg_dummy_shader_t _sg_shader_t; -typedef struct { +typedef struct _sg_pipeline_s { _sg_slot_t slot; _sg_shader_t* shader; _sg_pipeline_common_t cmn; @@ -5439,7 +5731,7 @@ typedef struct { _sg_image_t* image; } _sg_dummy_attachment_t; -typedef struct { +typedef struct _sg_attachments_s { _sg_slot_t slot; _sg_attachments_common_t cmn; struct { @@ -5452,17 +5744,18 @@ typedef _sg_dummy_attachments_t _sg_attachments_t; #elif defined(_SOKOL_ANY_GL) -typedef struct { +typedef struct _sg_buffer_s { _sg_slot_t slot; _sg_buffer_common_t cmn; struct { GLuint buf[SG_NUM_INFLIGHT_FRAMES]; bool injected; // if true, external buffers were injected with sg_buffer_desc.gl_buffers + bool gpu_dirty; // true if modified by GPU shader but memory barrier hasn't been issued yet } gl; } _sg_gl_buffer_t; typedef _sg_gl_buffer_t _sg_buffer_t; -typedef struct { +typedef struct _sg_image_s { _sg_slot_t slot; _sg_image_common_t cmn; struct { @@ -5474,7 +5767,7 @@ typedef struct { } _sg_gl_image_t; typedef _sg_gl_image_t _sg_image_t; -typedef struct { +typedef struct _sg_sampler_s { _sg_slot_t slot; _sg_sampler_common_t cmn; struct { @@ -5500,7 +5793,7 @@ typedef struct { _sg_str_t name; } _sg_gl_shader_attr_t; -typedef struct { +typedef struct _sg_shader_s { _sg_slot_t slot; _sg_shader_common_t cmn; struct { @@ -5523,7 +5816,7 @@ typedef struct { GLenum type; } _sg_gl_attr_t; -typedef struct { +typedef struct _sg_pipeline_s { _sg_slot_t slot; _sg_pipeline_common_t cmn; _sg_shader_t* shader; @@ -5546,7 +5839,7 @@ typedef struct { _sg_image_t* image; } _sg_gl_attachment_t; -typedef struct { +typedef struct _sg_attachments_s { _sg_slot_t slot; _sg_attachments_common_t cmn; struct { @@ -5618,17 +5911,18 @@ typedef struct { #elif defined(SOKOL_D3D11) -typedef struct { +typedef struct _sg_buffer_s { _sg_slot_t slot; _sg_buffer_common_t cmn; struct { ID3D11Buffer* buf; ID3D11ShaderResourceView* srv; + ID3D11UnorderedAccessView* uav; } d3d11; } _sg_d3d11_buffer_t; typedef _sg_d3d11_buffer_t _sg_buffer_t; -typedef struct { +typedef struct _sg_image_s { _sg_slot_t slot; _sg_image_common_t cmn; struct { @@ -5641,7 +5935,7 @@ typedef struct { } _sg_d3d11_image_t; typedef _sg_d3d11_image_t _sg_image_t; -typedef struct { +typedef struct _sg_sampler_s { _sg_slot_t slot; _sg_sampler_common_t cmn; struct { @@ -5656,30 +5950,34 @@ typedef struct { } _sg_d3d11_shader_attr_t; #define _SG_D3D11_MAX_STAGE_UB_BINDINGS (SG_MAX_UNIFORMBLOCK_BINDSLOTS) -#define _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS (SG_MAX_IMAGE_BINDSLOTS + SG_MAX_STORAGEBUFFER_BINDSLOTS) +#define _SG_D3D11_MAX_STAGE_SRV_BINDINGS (SG_MAX_IMAGE_BINDSLOTS + SG_MAX_STORAGEBUFFER_BINDSLOTS) +#define _SG_D3D11_MAX_STAGE_UAV_BINDINGS (SG_MAX_STORAGEBUFFER_BINDSLOTS) #define _SG_D3D11_MAX_STAGE_SMP_BINDINGS (SG_MAX_SAMPLER_BINDSLOTS) -typedef struct { +typedef struct _sg_shader_s { _sg_slot_t slot; _sg_shader_common_t cmn; struct { _sg_d3d11_shader_attr_t attrs[SG_MAX_VERTEX_ATTRIBUTES]; ID3D11VertexShader* vs; ID3D11PixelShader* fs; + ID3D11ComputeShader* cs; void* vs_blob; size_t vs_blob_length; uint8_t ub_register_b_n[SG_MAX_UNIFORMBLOCK_BINDSLOTS]; uint8_t img_register_t_n[SG_MAX_IMAGE_BINDSLOTS]; uint8_t smp_register_s_n[SG_MAX_SAMPLER_BINDSLOTS]; uint8_t sbuf_register_t_n[SG_MAX_STORAGEBUFFER_BINDSLOTS]; + uint8_t sbuf_register_u_n[SG_MAX_STORAGEBUFFER_BINDSLOTS]; ID3D11Buffer* all_cbufs[SG_MAX_UNIFORMBLOCK_BINDSLOTS]; ID3D11Buffer* vs_cbufs[_SG_D3D11_MAX_STAGE_UB_BINDINGS]; ID3D11Buffer* fs_cbufs[_SG_D3D11_MAX_STAGE_UB_BINDINGS]; + ID3D11Buffer* cs_cbufs[_SG_D3D11_MAX_STAGE_UB_BINDINGS]; } d3d11; } _sg_d3d11_shader_t; typedef _sg_d3d11_shader_t _sg_shader_t; -typedef struct { +typedef struct _sg_pipeline_s { _sg_slot_t slot; _sg_pipeline_common_t cmn; _sg_shader_t* shader; @@ -5704,7 +6002,7 @@ typedef struct { } view; } _sg_d3d11_attachment_t; -typedef struct { +typedef struct _sg_attachments_s { _sg_slot_t slot; _sg_attachments_common_t cmn; struct { @@ -5759,7 +6057,7 @@ typedef struct { _sg_mtl_release_item_t* release_queue; } _sg_mtl_idpool_t; -typedef struct { +typedef struct _sg_buffer_s { _sg_slot_t slot; _sg_buffer_common_t cmn; struct { @@ -5768,7 +6066,7 @@ typedef struct { } _sg_mtl_buffer_t; typedef _sg_mtl_buffer_t _sg_buffer_t; -typedef struct { +typedef struct _sg_image_s { _sg_slot_t slot; _sg_image_common_t cmn; struct { @@ -5777,7 +6075,7 @@ typedef struct { } _sg_mtl_image_t; typedef _sg_mtl_image_t _sg_image_t; -typedef struct { +typedef struct _sg_sampler_s { _sg_slot_t slot; _sg_sampler_common_t cmn; struct { @@ -5791,12 +6089,14 @@ typedef struct { int mtl_func; } _sg_mtl_shader_func_t; -typedef struct { +typedef struct _sg_shader_s { _sg_slot_t slot; _sg_shader_common_t cmn; struct { _sg_mtl_shader_func_t vertex_func; _sg_mtl_shader_func_t fragment_func; + _sg_mtl_shader_func_t compute_func; + MTLSize threads_per_threadgroup; uint8_t ub_buffer_n[SG_MAX_UNIFORMBLOCK_BINDSLOTS]; uint8_t img_texture_n[SG_MAX_IMAGE_BINDSLOTS]; uint8_t smp_sampler_n[SG_MAX_SAMPLER_BINDSLOTS]; @@ -5805,7 +6105,7 @@ typedef struct { } _sg_mtl_shader_t; typedef _sg_mtl_shader_t _sg_shader_t; -typedef struct { +typedef struct _sg_pipeline_s { _sg_slot_t slot; _sg_pipeline_common_t cmn; _sg_shader_t* shader; @@ -5816,8 +6116,10 @@ typedef struct { MTLCullMode cull_mode; MTLWinding winding; uint32_t stencil_ref; - int rps; - int dss; + MTLSize threads_per_threadgroup; + int cps; // MTLComputePipelineState + int rps; // MTLRenderPipelineState + int dss; // MTLDepthStencilState } mtl; } _sg_mtl_pipeline_t; typedef _sg_mtl_pipeline_t _sg_pipeline_t; @@ -5826,7 +6128,7 @@ typedef struct { _sg_image_t* image; } _sg_mtl_attachment_t; -typedef struct { +typedef struct _sg_attachments_s { _sg_slot_t slot; _sg_attachments_common_t cmn; struct { @@ -5852,10 +6154,13 @@ typedef struct { int cur_vs_buffer_offsets[_SG_MTL_MAX_STAGE_BUFFER_BINDINGS]; sg_buffer cur_vs_buffer_ids[_SG_MTL_MAX_STAGE_BUFFER_BINDINGS]; sg_buffer cur_fs_buffer_ids[_SG_MTL_MAX_STAGE_BUFFER_BINDINGS]; + sg_buffer cur_cs_buffer_ids[_SG_MTL_MAX_STAGE_BUFFER_BINDINGS]; sg_image cur_vs_image_ids[_SG_MTL_MAX_STAGE_IMAGE_BINDINGS]; sg_image cur_fs_image_ids[_SG_MTL_MAX_STAGE_IMAGE_BINDINGS]; + sg_image cur_cs_image_ids[_SG_MTL_MAX_STAGE_IMAGE_BINDINGS]; sg_sampler cur_vs_sampler_ids[_SG_MTL_MAX_STAGE_SAMPLER_BINDINGS]; sg_sampler cur_fs_sampler_ids[_SG_MTL_MAX_STAGE_SAMPLER_BINDINGS]; + sg_sampler cur_cs_sampler_ids[_SG_MTL_MAX_STAGE_SAMPLER_BINDINGS]; } _sg_mtl_state_cache_t; typedef struct { @@ -5871,7 +6176,8 @@ typedef struct { id device; id cmd_queue; id cmd_buffer; - id cmd_encoder; + id render_cmd_encoder; + id compute_cmd_encoder; id cur_drawable; id uniform_buffers[SG_NUM_INFLIGHT_FRAMES]; } _sg_mtl_backend_t; @@ -5888,7 +6194,7 @@ typedef struct { #define _SG_WGPU_MAX_IMG_SMP_SBUF_BINDGROUP_ENTRIES (SG_MAX_IMAGE_BINDSLOTS + SG_MAX_SAMPLER_BINDSLOTS + SG_MAX_STORAGEBUFFER_BINDSLOTS) #define _SG_WGPU_MAX_IMG_SMP_SBUF_BIND_SLOTS (128) -typedef struct { +typedef struct _sg_buffer_s { _sg_slot_t slot; _sg_buffer_common_t cmn; struct { @@ -5897,7 +6203,7 @@ typedef struct { } _sg_wgpu_buffer_t; typedef _sg_wgpu_buffer_t _sg_buffer_t; -typedef struct { +typedef struct _sg_image_s { _sg_slot_t slot; _sg_image_common_t cmn; struct { @@ -5907,7 +6213,7 @@ typedef struct { } _sg_wgpu_image_t; typedef _sg_wgpu_image_t _sg_image_t; -typedef struct { +typedef struct _sg_sampler_s { _sg_slot_t slot; _sg_sampler_common_t cmn; struct { @@ -5921,12 +6227,13 @@ typedef struct { _sg_str_t entry; } _sg_wgpu_shader_func_t; -typedef struct { +typedef struct _sg_shader_s { _sg_slot_t slot; _sg_shader_common_t cmn; struct { _sg_wgpu_shader_func_t vertex_func; _sg_wgpu_shader_func_t fragment_func; + _sg_wgpu_shader_func_t compute_func; WGPUBindGroupLayout bgl_ub; WGPUBindGroup bg_ub; WGPUBindGroupLayout bgl_img_smp_sbuf; @@ -5942,12 +6249,13 @@ typedef struct { } _sg_wgpu_shader_t; typedef _sg_wgpu_shader_t _sg_shader_t; -typedef struct { +typedef struct _sg_pipeline_s { _sg_slot_t slot; _sg_pipeline_common_t cmn; _sg_shader_t* shader; struct { - WGPURenderPipeline pip; + WGPURenderPipeline rpip; + WGPUComputePipeline cpip; WGPUColor blend_color; } wgpu; } _sg_wgpu_pipeline_t; @@ -5958,7 +6266,7 @@ typedef struct { WGPUTextureView view; } _sg_wgpu_attachment_t; -typedef struct { +typedef struct _sg_attachments_s { _sg_slot_t slot; _sg_attachments_common_t cmn; struct { @@ -6040,7 +6348,8 @@ typedef struct { WGPUSupportedLimits limits; WGPUQueue queue; WGPUCommandEncoder cmd_enc; - WGPURenderPassEncoder pass_enc; + WGPURenderPassEncoder rpass_enc; + WGPUComputePassEncoder cpass_enc; WGPUBindGroup empty_bind_group; const _sg_pipeline_t* cur_pipeline; sg_pipeline cur_pipeline_id; @@ -6056,7 +6365,7 @@ typedef struct { // this *MUST* remain 0 #define _SG_INVALID_SLOT_INDEX (0) -typedef struct { +typedef struct _sg_pools_s { _sg_pool_t buffer_pool; _sg_pool_t image_pool; _sg_pool_t sampler_pool; @@ -6105,6 +6414,7 @@ typedef struct { struct { bool valid; bool in_pass; + bool is_compute; sg_attachments atts_id; // SG_INVALID_ID in a swapchain pass _sg_attachments_t* atts; // 0 in a swapchain pass int width; @@ -6122,6 +6432,9 @@ typedef struct { #if defined(SOKOL_DEBUG) sg_log_item validate_error; #endif + struct { + _sg_tracker_t readwrite_sbufs; // tracks read/write storage buffers used in compute pass + } compute; _sg_pools_t pools; sg_backend backend; sg_features features; @@ -6925,6 +7238,12 @@ _SOKOL_PRIVATE void _sg_dummy_draw(int base_element, int num_elements, int num_i _SOKOL_UNUSED(num_instances); } +_SOKOL_PRIVATE void _sg_dummy_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + _SOKOL_UNUSED(num_groups_x); + _SOKOL_UNUSED(num_groups_y); + _SOKOL_UNUSED(num_groups_z); +} + _SOKOL_PRIVATE void _sg_dummy_update_buffer(_sg_buffer_t* buf, const sg_range* data) { SOKOL_ASSERT(buf && data && data->ptr && (data->size > 0)); _SOKOL_UNUSED(data); @@ -7076,7 +7395,9 @@ _SOKOL_PRIVATE void _sg_dummy_update_image(_sg_image_t* img, const sg_image_data _SG_XMACRO(glDeleteSamplers, void, (GLsizei n, const GLuint* samplers)) \ _SG_XMACRO(glBindBufferBase, void, (GLenum target, GLuint index, GLuint buffer)) \ _SG_XMACRO(glTexImage2DMultisample, void, (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLboolean fixedsamplelocations)) \ - _SG_XMACRO(glTexImage3DMultisample, void, (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations)) + _SG_XMACRO(glTexImage3DMultisample, void, (GLenum target, GLsizei samples, GLenum internalformat, GLsizei width, GLsizei height, GLsizei depth, GLboolean fixedsamplelocations)) \ + _SG_XMACRO(glDispatchCompute, void, (GLuint num_groups_x, GLuint num_groups_y, GLuint num_groups_z)) \ + _SG_XMACRO(glMemoryBarrier, void, (GLbitfield barriers)) // generate GL function pointer typedefs #define _SG_XMACRO(name, ret, args) typedef ret (GL_APIENTRY* PFN_ ## name) args; @@ -7171,6 +7492,7 @@ _SOKOL_PRIVATE GLenum _sg_gl_shader_stage(sg_shader_stage stage) { switch (stage) { case SG_SHADERSTAGE_VERTEX: return GL_VERTEX_SHADER; case SG_SHADERSTAGE_FRAGMENT: return GL_FRAGMENT_SHADER; + case SG_SHADERSTAGE_COMPUTE: return GL_COMPUTE_SHADER; default: SOKOL_UNREACHABLE; return 0; } } @@ -7803,7 +8125,7 @@ _SOKOL_PRIVATE void _sg_gl_init_caps_glcore(void) { _sg.features.image_clamp_to_border = true; _sg.features.mrt_independent_blend_state = false; _sg.features.mrt_independent_write_mask = true; - _sg.features.storage_buffer = version >= 430; + _sg.features.compute = version >= 430; #if defined(__APPLE__) _sg.features.msaa_image_bindings = false; #else @@ -7875,7 +8197,7 @@ _SOKOL_PRIVATE void _sg_gl_init_caps_gles3(void) { _sg.features.image_clamp_to_border = false; _sg.features.mrt_independent_blend_state = false; _sg.features.mrt_independent_write_mask = false; - _sg.features.storage_buffer = false; + _sg.features.compute = false; _sg.features.msaa_image_bindings = false; bool has_s3tc = false; // BC1..BC3 @@ -7970,7 +8292,7 @@ _SOKOL_PRIVATE void _sg_gl_cache_clear_buffer_bindings(bool force) { _sg_stats_add(gl.num_bind_buffer, 1); } if (force || (_sg.gl.cache.storage_buffer != 0)) { - if (_sg.features.storage_buffer) { + if (_sg.features.compute) { glBindBuffer(GL_SHADER_STORAGE_BUFFER, 0); } _sg.gl.cache.storage_buffer = 0; @@ -7978,7 +8300,7 @@ _SOKOL_PRIVATE void _sg_gl_cache_clear_buffer_bindings(bool force) { } for (size_t i = 0; i < _SG_GL_MAX_SBUF_BINDINGS; i++) { if (force || (_sg.gl.cache.storage_buffers[i] != 0)) { - if (_sg.features.storage_buffer) { + if (_sg.features.compute) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, (GLuint)i, 0); } _sg.gl.cache.storage_buffers[i] = 0; @@ -8004,7 +8326,7 @@ _SOKOL_PRIVATE void _sg_gl_cache_bind_buffer(GLenum target, GLuint buffer) { } else if (target == GL_SHADER_STORAGE_BUFFER) { if (_sg.gl.cache.storage_buffer != buffer) { _sg.gl.cache.storage_buffer = buffer; - if (_sg.features.storage_buffer) { + if (_sg.features.compute) { glBindBuffer(target, buffer); } _sg_stats_add(gl.num_bind_buffer, 1); @@ -8019,7 +8341,7 @@ _SOKOL_PRIVATE void _sg_gl_cache_bind_storage_buffer(uint8_t glsl_binding_n, GLu if (_sg.gl.cache.storage_buffers[glsl_binding_n] != buffer) { _sg.gl.cache.storage_buffers[glsl_binding_n] = buffer; _sg.gl.cache.storage_buffer = buffer; // not a bug - if (_sg.features.storage_buffer) { + if (_sg.features.compute) { glBindBufferBase(GL_SHADER_STORAGE_BUFFER, glsl_binding_n, buffer); } _sg_stats_add(gl.num_bind_buffer, 1); @@ -8374,8 +8696,16 @@ _SOKOL_PRIVATE sg_resource_state _sg_gl_create_buffer(_sg_buffer_t* buf, const s _sg_gl_cache_bind_buffer(gl_target, gl_buf); glBufferData(gl_target, buf->cmn.size, 0, gl_usage); if (buf->cmn.usage == SG_USAGE_IMMUTABLE) { - SOKOL_ASSERT(desc->data.ptr); - glBufferSubData(gl_target, 0, buf->cmn.size, desc->data.ptr); + if (desc->data.ptr) { + glBufferSubData(gl_target, 0, buf->cmn.size, desc->data.ptr); + } else { + // setup a zero-initialized buffer (don't explicitly need to do this on WebGL) + #if !defined(__EMSCRIPTEN__) + void* ptr = _sg_malloc_clear((size_t)buf->cmn.size); + glBufferSubData(gl_target, 0, buf->cmn.size, ptr); + _sg_free(ptr); + #endif + } } _sg_gl_cache_restore_buffer_binding(gl_target); } @@ -8668,19 +8998,39 @@ _SOKOL_PRIVATE sg_resource_state _sg_gl_create_shader(_sg_shader_t* shd, const s _sg_strcpy(&shd->gl.attrs[i].name, desc->attrs[i].glsl_name); } - GLuint gl_vs = _sg_gl_compile_shader(SG_SHADERSTAGE_VERTEX, desc->vertex_func.source); - GLuint gl_fs = _sg_gl_compile_shader(SG_SHADERSTAGE_FRAGMENT, desc->fragment_func.source); - if (!(gl_vs && gl_fs)) { - return SG_RESOURCESTATE_FAILED; - } + const bool has_vs = desc->vertex_func.source; + const bool has_fs = desc->fragment_func.source; + const bool has_cs = desc->compute_func.source; + SOKOL_ASSERT((has_vs && has_fs) || has_cs); GLuint gl_prog = glCreateProgram(); - glAttachShader(gl_prog, gl_vs); - glAttachShader(gl_prog, gl_fs); - glLinkProgram(gl_prog); - glDeleteShader(gl_vs); - glDeleteShader(gl_fs); - _SG_GL_CHECK_ERROR(); - + if (has_vs && has_fs) { + GLuint gl_vs = _sg_gl_compile_shader(SG_SHADERSTAGE_VERTEX, desc->vertex_func.source); + GLuint gl_fs = _sg_gl_compile_shader(SG_SHADERSTAGE_FRAGMENT, desc->fragment_func.source); + if (!(gl_vs && gl_fs)) { + glDeleteProgram(gl_prog); + if (gl_vs) { glDeleteShader(gl_vs); } + if (gl_fs) { glDeleteShader(gl_fs); } + return SG_RESOURCESTATE_FAILED; + } + glAttachShader(gl_prog, gl_vs); + glAttachShader(gl_prog, gl_fs); + glLinkProgram(gl_prog); + glDeleteShader(gl_vs); + glDeleteShader(gl_fs); + _SG_GL_CHECK_ERROR(); + } else if (has_cs) { + GLuint gl_cs = _sg_gl_compile_shader(SG_SHADERSTAGE_COMPUTE, desc->compute_func.source); + if (!gl_cs) { + glDeleteProgram(gl_prog); + return SG_RESOURCESTATE_FAILED; + } + glAttachShader(gl_prog, gl_cs); + glLinkProgram(gl_prog); + glDeleteShader(gl_cs); + _SG_GL_CHECK_ERROR(); + } else { + SOKOL_UNREACHABLE; + } GLint link_status; glGetProgramiv(gl_prog, GL_LINK_STATUS, &link_status); if (!link_status) { @@ -8793,6 +9143,10 @@ _SOKOL_PRIVATE sg_resource_state _sg_gl_create_pipeline(_sg_pipeline_t* pip, _sg SOKOL_ASSERT(shd->gl.prog); SOKOL_ASSERT(_sg.limits.max_vertex_attrs <= SG_MAX_VERTEX_ATTRIBUTES); pip->shader = shd; + if (pip->cmn.is_compute) { + // shortcut for compute pipelines + return SG_RESOURCESTATE_VALID; + } pip->gl.primitive_type = desc->primitive_type; pip->gl.depth = desc->depth; pip->gl.stencil = desc->stencil; @@ -9073,6 +9427,12 @@ _SOKOL_PRIVATE void _sg_gl_begin_pass(const sg_pass* pass) { // unbind all currently bound textures in begin pass? SOKOL_ASSERT(pass); _SG_GL_CHECK_ERROR(); + + // early out if this a compute pass + if (pass->compute) { + return; + } + const _sg_attachments_t* atts = _sg.cur_pass.atts; const sg_swapchain* swapchain = &pass->swapchain; const sg_pass_action* action = &pass->action; @@ -9255,6 +9615,21 @@ _SOKOL_PRIVATE void _sg_gl_apply_pipeline(_sg_pipeline_t* pip) { if ((_sg.gl.cache.cur_pipeline != pip) || (_sg.gl.cache.cur_pipeline_id.id != pip->slot.id)) { _sg.gl.cache.cur_pipeline = pip; _sg.gl.cache.cur_pipeline_id.id = pip->slot.id; + + // bind shader program + if (pip->shader->gl.prog != _sg.gl.cache.prog) { + _sg.gl.cache.prog = pip->shader->gl.prog; + glUseProgram(pip->shader->gl.prog); + _sg_stats_add(gl.num_use_program, 1); + } + + // if this is a compute pass, can early-out here + if (pip->cmn.is_compute) { + _SG_GL_CHECK_ERROR(); + return; + } + + // update render pipeline state _sg.gl.cache.cur_primitive_type = _sg_gl_primitive_type(pip->gl.primitive_type); _sg.gl.cache.cur_index_type = _sg_gl_index_type(pip->cmn.index_type); @@ -9463,16 +9838,37 @@ _SOKOL_PRIVATE void _sg_gl_apply_pipeline(_sg_pipeline_t* pip) { } #endif - // bind shader program - if (pip->shader->gl.prog != _sg.gl.cache.prog) { - _sg.gl.cache.prog = pip->shader->gl.prog; - glUseProgram(pip->shader->gl.prog); - _sg_stats_add(gl.num_use_program, 1); - } } _SG_GL_CHECK_ERROR(); } +#if defined _SOKOL_GL_HAS_COMPUTE +_SOKOL_PRIVATE void _sg_gl_handle_memory_barriers(const _sg_shader_t* shd, const _sg_bindings_t* bnd) { + // NOTE: currently only storage buffers can be GPU-written, and storage + // buffers cannot be bound as vertex- or index-buffers. + bool needs_barrier = false; + for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { + if (shd->cmn.storage_buffers[i].stage == SG_SHADERSTAGE_NONE) { + continue; + } + _sg_buffer_t* buf = bnd->sbufs[i]; + // if this buffer has pending GPU changes, issue a memory barrier + if (buf->gl.gpu_dirty) { + buf->gl.gpu_dirty = false; + needs_barrier = true; + } + // if this binding is going to be written by the GPU set the buffer to 'gpu_dirty' + if (!shd->cmn.storage_buffers[i].readonly) { + buf->gl.gpu_dirty = true; + } + } + if (needs_barrier) { + glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT); + _sg_stats_add(gl.num_memory_barriers, 1); + } +} +#endif + _SOKOL_PRIVATE bool _sg_gl_apply_bindings(_sg_bindings_t* bnd) { SOKOL_ASSERT(bnd); SOKOL_ASSERT(bnd->pip && bnd->pip->shader); @@ -9480,6 +9876,11 @@ _SOKOL_PRIVATE bool _sg_gl_apply_bindings(_sg_bindings_t* bnd) { _SG_GL_CHECK_ERROR(); const _sg_shader_t* shd = bnd->pip->shader; + // take care of storage buffer memory barriers + #if defined(_SOKOL_GL_HAS_COMPUTE) + _sg_gl_handle_memory_barriers(shd, bnd); + #endif + // bind combined image-samplers _SG_GL_CHECK_ERROR(); for (size_t img_smp_index = 0; img_smp_index < SG_MAX_IMAGE_SAMPLER_PAIRS; img_smp_index++) { @@ -9513,6 +9914,12 @@ _SOKOL_PRIVATE bool _sg_gl_apply_bindings(_sg_bindings_t* bnd) { GLuint gl_sbuf = sbuf->gl.buf[sbuf->cmn.active_slot]; _sg_gl_cache_bind_storage_buffer(binding, gl_sbuf); } + _SG_GL_CHECK_ERROR(); + + // if compute-pipeline, early out here + if (bnd->pip->cmn.is_compute) { + return true; + } // index buffer (can be 0) const GLuint gl_ib = bnd->ib ? bnd->ib->gl.buf[bnd->ib->cmn.active_slot] : 0; @@ -9653,6 +10060,14 @@ _SOKOL_PRIVATE void _sg_gl_draw(int base_element, int num_elements, int num_inst } } +_SOKOL_PRIVATE void _sg_gl_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + #if defined(_SOKOL_GL_HAS_COMPUTE) + glDispatchCompute((GLuint)num_groups_x, (GLuint)num_groups_y, (GLuint)num_groups_z); + #else + (void)num_groups_x; (void)num_groups_y; (void)num_groups_z; + #endif +} + _SOKOL_PRIVATE void _sg_gl_commit(void) { // "soft" clear bindings (only those that are actually bound) _sg_gl_cache_clear_buffer_bindings(false); @@ -9867,6 +10282,14 @@ static inline void _sg_d3d11_PSSetShader(ID3D11DeviceContext* self, ID3D11PixelS #endif } +static inline void _sg_d3d11_CSSetShader(ID3D11DeviceContext* self, ID3D11ComputeShader* pComputeShader, ID3D11ClassInstance* const* ppClassInstances, UINT NumClassInstances) { + #if defined(__cplusplus) + self->CSSetShader(pComputeShader, ppClassInstances, NumClassInstances); + #else + self->lpVtbl->CSSetShader(self, pComputeShader, ppClassInstances, NumClassInstances); + #endif +} + static inline void _sg_d3d11_VSSetConstantBuffers(ID3D11DeviceContext* self, UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { #if defined(__cplusplus) self->VSSetConstantBuffers(StartSlot, NumBuffers, ppConstantBuffers); @@ -9883,6 +10306,14 @@ static inline void _sg_d3d11_PSSetConstantBuffers(ID3D11DeviceContext* self, UIN #endif } +static inline void _sg_d3d11_CSSetConstantBuffers(ID3D11DeviceContext* self, UINT StartSlot, UINT NumBuffers, ID3D11Buffer* const* ppConstantBuffers) { + #if defined(__cplusplus) + self->CSSetConstantBuffers(StartSlot, NumBuffers, ppConstantBuffers); + #else + self->lpVtbl->CSSetConstantBuffers(self, StartSlot, NumBuffers, ppConstantBuffers); + #endif +} + static inline void _sg_d3d11_VSSetShaderResources(ID3D11DeviceContext* self, UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { #if defined(__cplusplus) self->VSSetShaderResources(StartSlot, NumViews, ppShaderResourceViews); @@ -9899,6 +10330,14 @@ static inline void _sg_d3d11_PSSetShaderResources(ID3D11DeviceContext* self, UIN #endif } +static inline void _sg_d3d11_CSSetShaderResources(ID3D11DeviceContext* self, UINT StartSlot, UINT NumViews, ID3D11ShaderResourceView* const* ppShaderResourceViews) { + #if defined(__cplusplus) + self->CSSetShaderResources(StartSlot, NumViews, ppShaderResourceViews); + #else + self->lpVtbl->CSSetShaderResources(self, StartSlot, NumViews, ppShaderResourceViews); + #endif +} + static inline void _sg_d3d11_VSSetSamplers(ID3D11DeviceContext* self, UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { #if defined(__cplusplus) self->VSSetSamplers(StartSlot, NumSamplers, ppSamplers); @@ -9915,6 +10354,22 @@ static inline void _sg_d3d11_PSSetSamplers(ID3D11DeviceContext* self, UINT Start #endif } +static inline void _sg_d3d11_CSSetSamplers(ID3D11DeviceContext* self, UINT StartSlot, UINT NumSamplers, ID3D11SamplerState* const* ppSamplers) { + #if defined(__cplusplus) + self->CSSetSamplers(StartSlot, NumSamplers, ppSamplers); + #else + self->lpVtbl->CSSetSamplers(self, StartSlot, NumSamplers, ppSamplers); + #endif +} + +static inline void _sg_d3d11_CSSetUnorderedAccessViews(ID3D11DeviceContext* self, UINT StartSlot, UINT NumUAVs, ID3D11UnorderedAccessView* const* ppUnorderedAccessViews, const UINT* pUAVInitialCounts) { + #if defined(__cplusplus) + self->CSSetUnorderedAccessViews(StartSlot, NumUAVs, ppUnorderedAccessViews, pUAVInitialCounts); + #else + self->lpVtbl->CSSetUnorderedAccessViews(self, StartSlot, NumUAVs, ppUnorderedAccessViews, pUAVInitialCounts); + #endif +} + static inline HRESULT _sg_d3d11_CreateBuffer(ID3D11Device* self, const D3D11_BUFFER_DESC* pDesc, const D3D11_SUBRESOURCE_DATA* pInitialData, ID3D11Buffer** ppBuffer) { #if defined(__cplusplus) return self->CreateBuffer(pDesc, pInitialData, ppBuffer); @@ -9939,6 +10394,14 @@ static inline HRESULT _sg_d3d11_CreateShaderResourceView(ID3D11Device* self, ID3 #endif } +static inline HRESULT _sg_d3d11_CreateUnorderedAccessView(ID3D11Device* self, ID3D11Resource* pResource, const D3D11_UNORDERED_ACCESS_VIEW_DESC* pDesc, ID3D11UnorderedAccessView** ppUAVView) { + #if defined(__cplusplus) + return self->CreateUnorderedAccessView(pResource, pDesc, ppUAVView); + #else + return self->lpVtbl->CreateUnorderedAccessView(self, pResource, pDesc, ppUAVView); + #endif +} + static inline void _sg_d3d11_GetResource(ID3D11View* self, ID3D11Resource** ppResource) { #if defined(__cplusplus) self->GetResource(ppResource); @@ -9995,6 +10458,14 @@ static inline HRESULT _sg_d3d11_CreatePixelShader(ID3D11Device* self, const void #endif } +static inline HRESULT _sg_d3d11_CreateComputeShader(ID3D11Device* self, const void* pShaderBytecode, SIZE_T BytecodeLength, ID3D11ClassLinkage* pClassLinkage, ID3D11ComputeShader** ppComputeShader) { + #if defined(__cplusplus) + return self->CreateComputeShader(pShaderBytecode, BytecodeLength, pClassLinkage, ppComputeShader); + #else + return self->lpVtbl->CreateComputeShader(self, pShaderBytecode, BytecodeLength, pClassLinkage, ppComputeShader); + #endif +} + static inline HRESULT _sg_d3d11_CreateInputLayout(ID3D11Device* self, const D3D11_INPUT_ELEMENT_DESC* pInputElementDescs, UINT NumElements, const void* pShaderBytecodeWithInputSignature, SIZE_T BytecodeLength, ID3D11InputLayout **ppInputLayout) { #if defined(__cplusplus) return self->CreateInputLayout(pInputElementDescs, NumElements, pShaderBytecodeWithInputSignature, BytecodeLength, ppInputLayout); @@ -10131,6 +10602,14 @@ static inline void _sg_d3d11_DrawInstanced(ID3D11DeviceContext* self, UINT Verte #endif } +static inline void _sg_d3d11_Dispatch(ID3D11DeviceContext* self, UINT ThreadGroupCountX, UINT ThreadGroupCountY, UINT ThreadGroupCountZ) { + #if defined(__cplusplus) + self->Dispatch(ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); + #else + self->lpVtbl->Dispatch(self, ThreadGroupCountX, ThreadGroupCountY, ThreadGroupCountZ); + #endif +} + static inline HRESULT _sg_d3d11_Map(ID3D11DeviceContext* self, ID3D11Resource* pResource, UINT Subresource, D3D11_MAP MapType, UINT MapFlags, D3D11_MAPPED_SUBRESOURCE* pMappedResource) { #if defined(__cplusplus) return self->Map(pResource, Subresource, MapType, MapFlags, pMappedResource); @@ -10169,15 +10648,35 @@ _SOKOL_PRIVATE D3D11_USAGE _sg_d3d11_usage(sg_usage usg) { } } -_SOKOL_PRIVATE UINT _sg_d3d11_buffer_bind_flags(sg_buffer_type t) { +_SOKOL_PRIVATE D3D11_USAGE _sg_d3d11_buffer_usage(sg_usage usg, sg_buffer_type type) { + switch (usg) { + case SG_USAGE_IMMUTABLE: + if (type == SG_BUFFERTYPE_STORAGEBUFFER) { + return D3D11_USAGE_DEFAULT; + } else { + return D3D11_USAGE_IMMUTABLE; + } + case SG_USAGE_DYNAMIC: + case SG_USAGE_STREAM: + return D3D11_USAGE_DYNAMIC; + default: + SOKOL_UNREACHABLE; + return (D3D11_USAGE) 0; + } +} + +_SOKOL_PRIVATE UINT _sg_d3d11_buffer_bind_flags(sg_usage usg, sg_buffer_type t) { switch (t) { case SG_BUFFERTYPE_VERTEXBUFFER: return D3D11_BIND_VERTEX_BUFFER; case SG_BUFFERTYPE_INDEXBUFFER: return D3D11_BIND_INDEX_BUFFER; case SG_BUFFERTYPE_STORAGEBUFFER: - // FIXME: for compute shaders we'd want UNORDERED_ACCESS? - return D3D11_BIND_SHADER_RESOURCE; + if (usg == SG_USAGE_IMMUTABLE) { + return D3D11_BIND_SHADER_RESOURCE | D3D11_BIND_UNORDERED_ACCESS; + } else { + return D3D11_BIND_SHADER_RESOURCE; + } default: SOKOL_UNREACHABLE; return 0; @@ -10507,7 +11006,7 @@ _SOKOL_PRIVATE void _sg_d3d11_init_caps(void) { _sg.features.image_clamp_to_border = true; _sg.features.mrt_independent_blend_state = true; _sg.features.mrt_independent_write_mask = true; - _sg.features.storage_buffer = true; + _sg.features.compute = true; _sg.features.msaa_image_bindings = true; _sg.limits.max_image_size_2d = 16 * 1024; @@ -10576,28 +11075,36 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_buffer(_sg_buffer_t* buf, cons D3D11_BUFFER_DESC d3d11_buf_desc; _sg_clear(&d3d11_buf_desc, sizeof(d3d11_buf_desc)); d3d11_buf_desc.ByteWidth = (UINT)buf->cmn.size; - d3d11_buf_desc.Usage = _sg_d3d11_usage(buf->cmn.usage); - d3d11_buf_desc.BindFlags = _sg_d3d11_buffer_bind_flags(buf->cmn.type); + d3d11_buf_desc.Usage = _sg_d3d11_buffer_usage(buf->cmn.usage, buf->cmn.type); + d3d11_buf_desc.BindFlags = _sg_d3d11_buffer_bind_flags(buf->cmn.usage, buf->cmn.type); d3d11_buf_desc.CPUAccessFlags = _sg_d3d11_cpu_access_flags(buf->cmn.usage); d3d11_buf_desc.MiscFlags = _sg_d3d11_buffer_misc_flags(buf->cmn.type); D3D11_SUBRESOURCE_DATA* init_data_ptr = 0; D3D11_SUBRESOURCE_DATA init_data; _sg_clear(&init_data, sizeof(init_data)); if (buf->cmn.usage == SG_USAGE_IMMUTABLE) { - SOKOL_ASSERT(desc->data.ptr); - init_data.pSysMem = desc->data.ptr; + // D3D11 doesn't allow creating immutable buffers without data, so need + // to explicitly provide a zero-initialized memory buffer + if (desc->data.ptr) { + init_data.pSysMem = desc->data.ptr; + } else { + init_data.pSysMem = (const void*)_sg_malloc_clear(buf->cmn.size); + } init_data_ptr = &init_data; } HRESULT hr = _sg_d3d11_CreateBuffer(_sg.d3d11.dev, &d3d11_buf_desc, init_data_ptr, &buf->d3d11.buf); + if (init_data.pSysMem && (desc->data.ptr == 0)) { + _sg_free((void*)init_data.pSysMem); + } if (!(SUCCEEDED(hr) && buf->d3d11.buf)) { _SG_ERROR(D3D11_CREATE_BUFFER_FAILED); return SG_RESOURCESTATE_FAILED; } - // for storage buffers need to create a view object + // for storage buffers need to create a shader-resource-view + // for read-only access, and an unordered-access-view for + // read-write access if (buf->cmn.type == SG_BUFFERTYPE_STORAGEBUFFER) { - // FIXME: currently only shader-resource-view, in future also UAV - // storage buffer size must be multiple of 4 SOKOL_ASSERT(_sg_multiple_u64(buf->cmn.size, 4)); D3D11_SHADER_RESOURCE_VIEW_DESC d3d11_srv_desc; _sg_clear(&d3d11_srv_desc, sizeof(d3d11_srv_desc)); @@ -10611,6 +11118,20 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_buffer(_sg_buffer_t* buf, cons _SG_ERROR(D3D11_CREATE_BUFFER_SRV_FAILED); return SG_RESOURCESTATE_FAILED; } + if (buf->cmn.usage == SG_USAGE_IMMUTABLE) { + D3D11_UNORDERED_ACCESS_VIEW_DESC d3d11_uav_desc; + _sg_clear(&d3d11_uav_desc, sizeof(d3d11_uav_desc)); + d3d11_uav_desc.Format = DXGI_FORMAT_R32_TYPELESS; + d3d11_uav_desc.ViewDimension = D3D11_UAV_DIMENSION_BUFFER; + d3d11_uav_desc.Buffer.FirstElement = 0; + d3d11_uav_desc.Buffer.NumElements = buf->cmn.size / 4; + d3d11_uav_desc.Buffer.Flags = D3D11_BUFFER_UAV_FLAG_RAW; + hr = _sg_d3d11_CreateUnorderedAccessView(_sg.d3d11.dev, (ID3D11Resource*)buf->d3d11.buf, &d3d11_uav_desc, &buf->d3d11.uav); + if (!(SUCCEEDED(hr) && buf->d3d11.uav)) { + _SG_ERROR(D3D11_CREATE_BUFFER_UAV_FAILED); + return SG_RESOURCESTATE_FAILED; + } + } } _sg_d3d11_setlabel(buf->d3d11.buf, desc->label); } @@ -10625,6 +11146,9 @@ _SOKOL_PRIVATE void _sg_d3d11_discard_buffer(_sg_buffer_t* buf) { if (buf->d3d11.srv) { _sg_d3d11_Release(buf->d3d11.srv); } + if (buf->d3d11.uav) { + _sg_d3d11_Release(buf->d3d11.uav); + } } _SOKOL_PRIVATE void _sg_d3d11_fill_subres_data(const _sg_image_t* img, const sg_image_data* data) { @@ -10948,13 +11472,17 @@ _SOKOL_PRIVATE bool _sg_d3d11_ensure_hlsl_bindslot_ranges(const sg_shader_desc* } } for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { - if (desc->storage_buffers[i].hlsl_register_t_n >= _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS) { + if (desc->storage_buffers[i].hlsl_register_t_n >= _SG_D3D11_MAX_STAGE_SRV_BINDINGS) { _SG_ERROR(D3D11_STORAGEBUFFER_HLSL_REGISTER_T_OUT_OF_RANGE); return false; } + if (desc->storage_buffers[i].hlsl_register_u_n >= _SG_D3D11_MAX_STAGE_UAV_BINDINGS) { + _SG_ERROR(D3D11_STORAGEBUFFER_HLSL_REGISTER_U_OUT_OF_RANGE); + return false; + } } for (size_t i = 0; i < SG_MAX_IMAGE_BINDSLOTS; i++) { - if (desc->images[i].hlsl_register_t_n >= _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS) { + if (desc->images[i].hlsl_register_t_n >= _SG_D3D11_MAX_STAGE_SRV_BINDINGS) { _SG_ERROR(D3D11_IMAGE_HLSL_REGISTER_T_OUT_OF_RANGE); return false; } @@ -10970,7 +11498,7 @@ _SOKOL_PRIVATE bool _sg_d3d11_ensure_hlsl_bindslot_ranges(const sg_shader_desc* _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_shader(_sg_shader_t* shd, const sg_shader_desc* desc) { SOKOL_ASSERT(shd && desc); - SOKOL_ASSERT(!shd->d3d11.vs && !shd->d3d11.fs && !shd->d3d11.vs_blob); + SOKOL_ASSERT(!shd->d3d11.vs && !shd->d3d11.fs && !shd->d3d11.cs && !shd->d3d11.vs_blob); HRESULT hr; // perform a range-check on HLSL bindslots that's also active in release @@ -10991,6 +11519,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_shader(_sg_shader_t* shd, cons } for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { shd->d3d11.sbuf_register_t_n[i] = desc->storage_buffers[i].hlsl_register_t_n; + shd->d3d11.sbuf_register_u_n[i] = desc->storage_buffers[i].hlsl_register_u_n; } for (size_t i = 0; i < SG_MAX_IMAGE_BINDSLOTS; i++) { shd->d3d11.img_register_t_n[i] = desc->images[i].hlsl_register_t_n; @@ -11005,8 +11534,6 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_shader(_sg_shader_t* shd, cons if (stage == SG_SHADERSTAGE_NONE) { continue; } - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); - const _sg_shader_uniform_block_t* ub = &shd->cmn.uniform_blocks[ub_index]; ID3D11Buffer* cbuf = 0; D3D11_BUFFER_DESC cb_desc; @@ -11027,59 +11554,107 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_shader(_sg_shader_t* shd, cons if (stage == SG_SHADERSTAGE_VERTEX) { SOKOL_ASSERT(0 == shd->d3d11.vs_cbufs[d3d11_slot]); shd->d3d11.vs_cbufs[d3d11_slot] = cbuf; - } else { + } else if (stage == SG_SHADERSTAGE_FRAGMENT) { SOKOL_ASSERT(0 == shd->d3d11.fs_cbufs[d3d11_slot]); shd->d3d11.fs_cbufs[d3d11_slot] = cbuf; + } else if (stage == SG_SHADERSTAGE_COMPUTE) { + SOKOL_ASSERT(0 == shd->d3d11.cs_cbufs[d3d11_slot]); + shd->d3d11.cs_cbufs[d3d11_slot] = cbuf; + } else { + SOKOL_UNREACHABLE; } } // create shader functions - const void* vs_ptr = 0, *fs_ptr = 0; - SIZE_T vs_length = 0, fs_length = 0; - ID3DBlob* vs_blob = 0, *fs_blob = 0; - if (desc->vertex_func.bytecode.ptr && desc->fragment_func.bytecode.ptr) { - // create from shader byte code - vs_ptr = desc->vertex_func.bytecode.ptr; - fs_ptr = desc->fragment_func.bytecode.ptr; - vs_length = desc->vertex_func.bytecode.size; - fs_length = desc->fragment_func.bytecode.size; - } else { - // compile from shader source code - vs_blob = _sg_d3d11_compile_shader(&desc->vertex_func); - fs_blob = _sg_d3d11_compile_shader(&desc->fragment_func); - if (vs_blob && fs_blob) { - vs_ptr = _sg_d3d11_GetBufferPointer(vs_blob); - vs_length = _sg_d3d11_GetBufferSize(vs_blob); - fs_ptr = _sg_d3d11_GetBufferPointer(fs_blob); - fs_length = _sg_d3d11_GetBufferSize(fs_blob); + const bool has_vs = desc->vertex_func.bytecode.ptr || desc->vertex_func.source; + const bool has_fs = desc->fragment_func.bytecode.ptr || desc->fragment_func.source; + const bool has_cs = desc->compute_func.bytecode.ptr || desc->compute_func.source; + bool vs_valid = false; bool fs_valid = false; bool cs_valid = false; + if (has_vs) { + const void* vs_ptr = 0; SIZE_T vs_length = 0; + ID3DBlob* vs_blob = 0; + if (desc->vertex_func.bytecode.ptr) { + SOKOL_ASSERT(desc->vertex_func.bytecode.size > 0); + vs_ptr = desc->vertex_func.bytecode.ptr; + vs_length = desc->vertex_func.bytecode.size; + } else { + SOKOL_ASSERT(desc->vertex_func.source); + vs_blob = _sg_d3d11_compile_shader(&desc->vertex_func); + if (vs_blob) { + vs_ptr = _sg_d3d11_GetBufferPointer(vs_blob); + vs_length = _sg_d3d11_GetBufferSize(vs_blob); + } } - } - sg_resource_state result = SG_RESOURCESTATE_FAILED; - if (vs_ptr && fs_ptr && (vs_length > 0) && (fs_length > 0)) { - // create the D3D vertex- and pixel-shader objects - hr = _sg_d3d11_CreateVertexShader(_sg.d3d11.dev, vs_ptr, vs_length, NULL, &shd->d3d11.vs); - bool vs_succeeded = SUCCEEDED(hr) && shd->d3d11.vs; - hr = _sg_d3d11_CreatePixelShader(_sg.d3d11.dev, fs_ptr, fs_length, NULL, &shd->d3d11.fs); - bool fs_succeeded = SUCCEEDED(hr) && shd->d3d11.fs; - - // need to store the vertex shader byte code, this is needed later in sg_create_pipeline - if (vs_succeeded && fs_succeeded) { + if (vs_ptr && (vs_length > 0)) { + hr = _sg_d3d11_CreateVertexShader(_sg.d3d11.dev, vs_ptr, vs_length, NULL, &shd->d3d11.vs); + vs_valid = SUCCEEDED(hr) && shd->d3d11.vs; + } + // set label, and need to store a copy of the vertex shader blob for the pipeline creation + if (vs_valid) { + _sg_d3d11_setlabel(shd->d3d11.vs, desc->label); shd->d3d11.vs_blob_length = vs_length; shd->d3d11.vs_blob = _sg_malloc((size_t)vs_length); SOKOL_ASSERT(shd->d3d11.vs_blob); memcpy(shd->d3d11.vs_blob, vs_ptr, vs_length); - result = SG_RESOURCESTATE_VALID; - _sg_d3d11_setlabel(shd->d3d11.vs, desc->label); - _sg_d3d11_setlabel(shd->d3d11.fs, desc->label); + } + if (vs_blob) { + _sg_d3d11_Release(vs_blob); } } - if (vs_blob) { - _sg_d3d11_Release(vs_blob); vs_blob = 0; + if (has_fs) { + const void* fs_ptr = 0; SIZE_T fs_length = 0; + ID3DBlob* fs_blob = 0; + if (desc->fragment_func.bytecode.ptr) { + SOKOL_ASSERT(desc->fragment_func.bytecode.size > 0); + fs_ptr = desc->fragment_func.bytecode.ptr; + fs_length = desc->fragment_func.bytecode.size; + } else { + SOKOL_ASSERT(desc->fragment_func.source); + fs_blob = _sg_d3d11_compile_shader(&desc->fragment_func); + if (fs_blob) { + fs_ptr = _sg_d3d11_GetBufferPointer(fs_blob); + fs_length = _sg_d3d11_GetBufferSize(fs_blob); + } + } + if (fs_ptr && (fs_length > 0)) { + hr = _sg_d3d11_CreatePixelShader(_sg.d3d11.dev, fs_ptr, fs_length, NULL, &shd->d3d11.fs); + fs_valid = SUCCEEDED(hr) && shd->d3d11.fs; + } + if (fs_valid) { + _sg_d3d11_setlabel(shd->d3d11.fs, desc->label); + } + if (fs_blob) { + _sg_d3d11_Release(fs_blob); + } } - if (fs_blob) { - _sg_d3d11_Release(fs_blob); fs_blob = 0; + if (has_cs) { + const void* cs_ptr = 0; SIZE_T cs_length = 0; + ID3DBlob* cs_blob = 0; + if (desc->compute_func.bytecode.ptr) { + SOKOL_ASSERT(desc->compute_func.bytecode.size > 0); + cs_ptr = desc->compute_func.bytecode.ptr; + cs_length = desc->compute_func.bytecode.size; + } else { + SOKOL_ASSERT(desc->compute_func.source); + cs_blob = _sg_d3d11_compile_shader(&desc->compute_func); + if (cs_blob) { + cs_ptr = _sg_d3d11_GetBufferPointer(cs_blob); + cs_length = _sg_d3d11_GetBufferSize(cs_blob); + } + } + if (cs_ptr && (cs_length > 0)) { + hr = _sg_d3d11_CreateComputeShader(_sg.d3d11.dev, cs_ptr, cs_length, NULL, &shd->d3d11.cs); + cs_valid = SUCCEEDED(hr) && shd->d3d11.cs; + } + if (cs_blob) { + _sg_d3d11_Release(cs_blob); + } + } + if ((vs_valid && fs_valid) || cs_valid) { + return SG_RESOURCESTATE_VALID; + } else { + return SG_RESOURCESTATE_FAILED; } - return result; } _SOKOL_PRIVATE void _sg_d3d11_discard_shader(_sg_shader_t* shd) { @@ -11090,6 +11665,9 @@ _SOKOL_PRIVATE void _sg_d3d11_discard_shader(_sg_shader_t* shd) { if (shd->d3d11.fs) { _sg_d3d11_Release(shd->d3d11.fs); } + if (shd->d3d11.cs) { + _sg_d3d11_Release(shd->d3d11.cs); + } if (shd->d3d11.vs_blob) { _sg_free(shd->d3d11.vs_blob); } @@ -11104,10 +11682,18 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_pipeline(_sg_pipeline_t* pip, SOKOL_ASSERT(pip && shd && desc); SOKOL_ASSERT(desc->shader.id == shd->slot.id); SOKOL_ASSERT(shd->slot.state == SG_RESOURCESTATE_VALID); + + pip->shader = shd; + + // if this is a compute pipeline, we're done here + if (pip->cmn.is_compute) { + return SG_RESOURCESTATE_VALID; + } + + // a render pipeline... SOKOL_ASSERT(shd->d3d11.vs_blob && shd->d3d11.vs_blob_length > 0); SOKOL_ASSERT(!pip->d3d11.il && !pip->d3d11.rs && !pip->d3d11.dss && !pip->d3d11.bs); - pip->shader = shd; pip->d3d11.index_format = _sg_d3d11_index_format(pip->cmn.index_type); pip->d3d11.topology = _sg_d3d11_primitive_topology(desc->primitive_type); pip->d3d11.stencil_ref = desc->stencil.ref; @@ -11116,7 +11702,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_pipeline(_sg_pipeline_t* pip, HRESULT hr; D3D11_INPUT_ELEMENT_DESC d3d11_comps[SG_MAX_VERTEX_ATTRIBUTES]; _sg_clear(d3d11_comps, sizeof(d3d11_comps)); - int attr_index = 0; + size_t attr_index = 0; for (; attr_index < SG_MAX_VERTEX_ATTRIBUTES; attr_index++) { const sg_vertex_attr_state* a_state = &desc->layout.attrs[attr_index]; if (a_state->format == SG_VERTEXFORMAT_INVALID) { @@ -11139,7 +11725,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_pipeline(_sg_pipeline_t* pip, pip->cmn.use_instanced_draw = true; } } - for (int layout_index = 0; layout_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; layout_index++) { + for (size_t layout_index = 0; layout_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; layout_index++) { if (pip->cmn.vertex_buffer_layout_active[layout_index]) { const sg_vertex_buffer_layout_state* l_state = &desc->layout.buffers[layout_index]; SOKOL_ASSERT(l_state->stride > 0); @@ -11214,7 +11800,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_pipeline(_sg_pipeline_t* pip, bs_desc.AlphaToCoverageEnable = desc->alpha_to_coverage_enabled; bs_desc.IndependentBlendEnable = TRUE; { - int i = 0; + size_t i = 0; for (i = 0; i < desc->color_count; i++) { const sg_blend_state* src = &desc->colors[i].blend; D3D11_RENDER_TARGET_BLEND_DESC* dst = &bs_desc.RenderTarget[i]; @@ -11271,7 +11857,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_attachments(_sg_attachments_t* SOKOL_ASSERT(_sg.d3d11.dev); // copy image pointers - for (int i = 0; i < atts->cmn.num_colors; i++) { + for (size_t i = 0; i < atts->cmn.num_colors; i++) { const sg_attachment_desc* color_desc = &desc->colors[i]; _SOKOL_UNUSED(color_desc); SOKOL_ASSERT(color_desc->image.id != SG_INVALID_ID); @@ -11297,7 +11883,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_attachments(_sg_attachments_t* } // create render-target views - for (int i = 0; i < atts->cmn.num_colors; i++) { + for (size_t i = 0; i < atts->cmn.num_colors; i++) { const _sg_attachment_common_t* cmn_color_att = &atts->cmn.colors[i]; const _sg_image_t* color_img = color_images[i]; SOKOL_ASSERT(0 == atts->d3d11.colors[i].view.rtv); @@ -11379,7 +11965,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_d3d11_create_attachments(_sg_attachments_t* _SOKOL_PRIVATE void _sg_d3d11_discard_attachments(_sg_attachments_t* atts) { SOKOL_ASSERT(atts); - for (int i = 0; i < SG_MAX_COLOR_ATTACHMENTS; i++) { + for (size_t i = 0; i < SG_MAX_COLOR_ATTACHMENTS; i++) { if (atts->d3d11.colors[i].view.rtv) { _sg_d3d11_Release(atts->d3d11.colors[i].view.rtv); } @@ -11409,7 +11995,10 @@ _SOKOL_PRIVATE _sg_image_t* _sg_d3d11_attachments_ds_image(const _sg_attachments _SOKOL_PRIVATE void _sg_d3d11_begin_pass(const sg_pass* pass) { SOKOL_ASSERT(pass); - + if (_sg.cur_pass.is_compute) { + // nothing to do in compute passes + return; + } const _sg_attachments_t* atts = _sg.cur_pass.atts; const sg_swapchain* swapchain = &pass->swapchain; const sg_pass_action* action = &pass->action; @@ -11421,7 +12010,7 @@ _SOKOL_PRIVATE void _sg_d3d11_begin_pass(const sg_pass* pass) { _sg.d3d11.cur_pass.resolve_view = 0; if (atts) { num_rtvs = atts->cmn.num_colors; - for (int i = 0; i < SG_MAX_COLOR_ATTACHMENTS; i++) { + for (size_t i = 0; i < SG_MAX_COLOR_ATTACHMENTS; i++) { rtvs[i] = atts->d3d11.colors[i].view.rtv; } dsv = atts->d3d11.depth_stencil.view.dsv; @@ -11453,7 +12042,7 @@ _SOKOL_PRIVATE void _sg_d3d11_begin_pass(const sg_pass* pass) { _sg_d3d11_RSSetScissorRects(_sg.d3d11.ctx, 1, &rect); // perform clear action - for (int i = 0; i < num_rtvs; i++) { + for (size_t i = 0; i < num_rtvs; i++) { if (action->colors[i].load_action == SG_LOADACTION_CLEAR) { _sg_d3d11_ClearRenderTargetView(_sg.d3d11.ctx, rtvs[i], (float*)&action->colors[i].clear_value); _sg_stats_add(d3d11.pass.num_clear_render_target_view, 1); @@ -11480,55 +12069,57 @@ _SOKOL_PRIVATE UINT _sg_d3d11_calcsubresource(UINT mip_slice, UINT array_slice, _SOKOL_PRIVATE void _sg_d3d11_end_pass(void) { SOKOL_ASSERT(_sg.d3d11.ctx); - // need to resolve MSAA render attachments into texture? - if (_sg.cur_pass.atts_id.id != SG_INVALID_ID) { - // ...for offscreen pass... - SOKOL_ASSERT(_sg.cur_pass.atts && _sg.cur_pass.atts->slot.id == _sg.cur_pass.atts_id.id); - for (int i = 0; i < _sg.cur_pass.atts->cmn.num_colors; i++) { - const _sg_image_t* resolve_img = _sg.cur_pass.atts->d3d11.resolves[i].image; - if (resolve_img) { - const _sg_image_t* color_img = _sg.cur_pass.atts->d3d11.colors[i].image; - const _sg_attachment_common_t* cmn_color_att = &_sg.cur_pass.atts->cmn.colors[i]; - const _sg_attachment_common_t* cmn_resolve_att = &_sg.cur_pass.atts->cmn.resolves[i]; - SOKOL_ASSERT(resolve_img->slot.id == cmn_resolve_att->image_id.id); - SOKOL_ASSERT(color_img && (color_img->slot.id == cmn_color_att->image_id.id)); - SOKOL_ASSERT(color_img->cmn.sample_count > 1); - SOKOL_ASSERT(resolve_img->cmn.sample_count == 1); - const UINT src_subres = _sg_d3d11_calcsubresource( - (UINT)cmn_color_att->mip_level, - (UINT)cmn_color_att->slice, - (UINT)color_img->cmn.num_mipmaps); - const UINT dst_subres = _sg_d3d11_calcsubresource( - (UINT)cmn_resolve_att->mip_level, - (UINT)cmn_resolve_att->slice, - (UINT)resolve_img->cmn.num_mipmaps); - _sg_d3d11_ResolveSubresource(_sg.d3d11.ctx, - resolve_img->d3d11.res, - dst_subres, - color_img->d3d11.res, - src_subres, - color_img->d3d11.format); + if (!_sg.cur_pass.is_compute) { + // need to resolve MSAA render attachments into texture? + if (_sg.cur_pass.atts_id.id != SG_INVALID_ID) { + // ...for offscreen pass... + SOKOL_ASSERT(_sg.cur_pass.atts && _sg.cur_pass.atts->slot.id == _sg.cur_pass.atts_id.id); + for (size_t i = 0; i < _sg.cur_pass.atts->cmn.num_colors; i++) { + const _sg_image_t* resolve_img = _sg.cur_pass.atts->d3d11.resolves[i].image; + if (resolve_img) { + const _sg_image_t* color_img = _sg.cur_pass.atts->d3d11.colors[i].image; + const _sg_attachment_common_t* cmn_color_att = &_sg.cur_pass.atts->cmn.colors[i]; + const _sg_attachment_common_t* cmn_resolve_att = &_sg.cur_pass.atts->cmn.resolves[i]; + SOKOL_ASSERT(resolve_img->slot.id == cmn_resolve_att->image_id.id); + SOKOL_ASSERT(color_img && (color_img->slot.id == cmn_color_att->image_id.id)); + SOKOL_ASSERT(color_img->cmn.sample_count > 1); + SOKOL_ASSERT(resolve_img->cmn.sample_count == 1); + const UINT src_subres = _sg_d3d11_calcsubresource( + (UINT)cmn_color_att->mip_level, + (UINT)cmn_color_att->slice, + (UINT)color_img->cmn.num_mipmaps); + const UINT dst_subres = _sg_d3d11_calcsubresource( + (UINT)cmn_resolve_att->mip_level, + (UINT)cmn_resolve_att->slice, + (UINT)resolve_img->cmn.num_mipmaps); + _sg_d3d11_ResolveSubresource(_sg.d3d11.ctx, + resolve_img->d3d11.res, + dst_subres, + color_img->d3d11.res, + src_subres, + color_img->d3d11.format); + _sg_stats_add(d3d11.pass.num_resolve_subresource, 1); + } + } + } else { + // ...for swapchain pass... + if (_sg.d3d11.cur_pass.resolve_view) { + SOKOL_ASSERT(_sg.d3d11.cur_pass.render_view); + SOKOL_ASSERT(_sg.cur_pass.swapchain.sample_count > 1); + SOKOL_ASSERT(_sg.cur_pass.swapchain.color_fmt > SG_PIXELFORMAT_NONE); + ID3D11Resource* d3d11_render_res = 0; + ID3D11Resource* d3d11_resolve_res = 0; + _sg_d3d11_GetResource((ID3D11View*)_sg.d3d11.cur_pass.render_view, &d3d11_render_res); + _sg_d3d11_GetResource((ID3D11View*)_sg.d3d11.cur_pass.resolve_view, &d3d11_resolve_res); + SOKOL_ASSERT(d3d11_render_res); + SOKOL_ASSERT(d3d11_resolve_res); + const sg_pixel_format color_fmt = _sg.cur_pass.swapchain.color_fmt; + _sg_d3d11_ResolveSubresource(_sg.d3d11.ctx, d3d11_resolve_res, 0, d3d11_render_res, 0, _sg_d3d11_rtv_pixel_format(color_fmt)); + _sg_d3d11_Release(d3d11_render_res); + _sg_d3d11_Release(d3d11_resolve_res); _sg_stats_add(d3d11.pass.num_resolve_subresource, 1); } } - } else { - // ...for swapchain pass... - if (_sg.d3d11.cur_pass.resolve_view) { - SOKOL_ASSERT(_sg.d3d11.cur_pass.render_view); - SOKOL_ASSERT(_sg.cur_pass.swapchain.sample_count > 1); - SOKOL_ASSERT(_sg.cur_pass.swapchain.color_fmt > SG_PIXELFORMAT_NONE); - ID3D11Resource* d3d11_render_res = 0; - ID3D11Resource* d3d11_resolve_res = 0; - _sg_d3d11_GetResource((ID3D11View*)_sg.d3d11.cur_pass.render_view, &d3d11_render_res); - _sg_d3d11_GetResource((ID3D11View*)_sg.d3d11.cur_pass.resolve_view, &d3d11_resolve_res); - SOKOL_ASSERT(d3d11_render_res); - SOKOL_ASSERT(d3d11_resolve_res); - const sg_pixel_format color_fmt = _sg.cur_pass.swapchain.color_fmt; - _sg_d3d11_ResolveSubresource(_sg.d3d11.ctx, d3d11_resolve_res, 0, d3d11_render_res, 0, _sg_d3d11_rtv_pixel_format(color_fmt)); - _sg_d3d11_Release(d3d11_render_res); - _sg_d3d11_Release(d3d11_resolve_res); - _sg_stats_add(d3d11.pass.num_resolve_subresource, 1); - } } _sg.d3d11.cur_pass.render_view = 0; _sg.d3d11.cur_pass.resolve_view = 0; @@ -11563,31 +12154,45 @@ _SOKOL_PRIVATE void _sg_d3d11_apply_pipeline(_sg_pipeline_t* pip) { SOKOL_ASSERT(pip); SOKOL_ASSERT(pip->shader && (pip->cmn.shader_id.id == pip->shader->slot.id)); SOKOL_ASSERT(_sg.d3d11.ctx); - SOKOL_ASSERT(pip->d3d11.rs && pip->d3d11.bs && pip->d3d11.dss); _sg.d3d11.cur_pipeline = pip; _sg.d3d11.cur_pipeline_id.id = pip->slot.id; - _sg.d3d11.use_indexed_draw = (pip->d3d11.index_format != DXGI_FORMAT_UNKNOWN); - _sg.d3d11.use_instanced_draw = pip->cmn.use_instanced_draw; - _sg_d3d11_RSSetState(_sg.d3d11.ctx, pip->d3d11.rs); - _sg_d3d11_OMSetDepthStencilState(_sg.d3d11.ctx, pip->d3d11.dss, pip->d3d11.stencil_ref); - _sg_d3d11_OMSetBlendState(_sg.d3d11.ctx, pip->d3d11.bs, (float*)&pip->cmn.blend_color, 0xFFFFFFFF); - _sg_d3d11_IASetPrimitiveTopology(_sg.d3d11.ctx, pip->d3d11.topology); - _sg_d3d11_IASetInputLayout(_sg.d3d11.ctx, pip->d3d11.il); - _sg_d3d11_VSSetShader(_sg.d3d11.ctx, pip->shader->d3d11.vs, NULL, 0); - _sg_d3d11_VSSetConstantBuffers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UB_BINDINGS, pip->shader->d3d11.vs_cbufs); - _sg_d3d11_PSSetShader(_sg.d3d11.ctx, pip->shader->d3d11.fs, NULL, 0); - _sg_d3d11_PSSetConstantBuffers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UB_BINDINGS, pip->shader->d3d11.fs_cbufs); - _sg_stats_add(d3d11.pipeline.num_rs_set_state, 1); - _sg_stats_add(d3d11.pipeline.num_om_set_depth_stencil_state, 1); - _sg_stats_add(d3d11.pipeline.num_om_set_blend_state, 1); - _sg_stats_add(d3d11.pipeline.num_ia_set_primitive_topology, 1); - _sg_stats_add(d3d11.pipeline.num_ia_set_input_layout, 1); - _sg_stats_add(d3d11.pipeline.num_vs_set_shader, 1); - _sg_stats_add(d3d11.pipeline.num_vs_set_constant_buffers, 1); - _sg_stats_add(d3d11.pipeline.num_ps_set_shader, 1); - _sg_stats_add(d3d11.pipeline.num_ps_set_constant_buffers, 1); + if (pip->cmn.is_compute) { + // a compute pipeline + SOKOL_ASSERT(pip->shader->d3d11.cs); + _sg_d3d11_CSSetShader(_sg.d3d11.ctx, pip->shader->d3d11.cs, NULL, 0); + _sg_d3d11_CSSetConstantBuffers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UB_BINDINGS, pip->shader->d3d11.cs_cbufs); + _sg_stats_add(d3d11.pipeline.num_cs_set_shader, 1); + _sg_stats_add(d3d11.pipeline.num_cs_set_constant_buffers, 1); + } else { + // a render pipeline + SOKOL_ASSERT(pip->d3d11.rs && pip->d3d11.bs && pip->d3d11.dss); + SOKOL_ASSERT(pip->shader->d3d11.vs); + SOKOL_ASSERT(pip->shader->d3d11.fs); + + _sg.d3d11.use_indexed_draw = (pip->d3d11.index_format != DXGI_FORMAT_UNKNOWN); + _sg.d3d11.use_instanced_draw = pip->cmn.use_instanced_draw; + + _sg_d3d11_RSSetState(_sg.d3d11.ctx, pip->d3d11.rs); + _sg_d3d11_OMSetDepthStencilState(_sg.d3d11.ctx, pip->d3d11.dss, pip->d3d11.stencil_ref); + _sg_d3d11_OMSetBlendState(_sg.d3d11.ctx, pip->d3d11.bs, (float*)&pip->cmn.blend_color, 0xFFFFFFFF); + _sg_d3d11_IASetPrimitiveTopology(_sg.d3d11.ctx, pip->d3d11.topology); + _sg_d3d11_IASetInputLayout(_sg.d3d11.ctx, pip->d3d11.il); + _sg_d3d11_VSSetShader(_sg.d3d11.ctx, pip->shader->d3d11.vs, NULL, 0); + _sg_d3d11_VSSetConstantBuffers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UB_BINDINGS, pip->shader->d3d11.vs_cbufs); + _sg_d3d11_PSSetShader(_sg.d3d11.ctx, pip->shader->d3d11.fs, NULL, 0); + _sg_d3d11_PSSetConstantBuffers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UB_BINDINGS, pip->shader->d3d11.fs_cbufs); + _sg_stats_add(d3d11.pipeline.num_rs_set_state, 1); + _sg_stats_add(d3d11.pipeline.num_om_set_depth_stencil_state, 1); + _sg_stats_add(d3d11.pipeline.num_om_set_blend_state, 1); + _sg_stats_add(d3d11.pipeline.num_ia_set_primitive_topology, 1); + _sg_stats_add(d3d11.pipeline.num_ia_set_input_layout, 1); + _sg_stats_add(d3d11.pipeline.num_vs_set_shader, 1); + _sg_stats_add(d3d11.pipeline.num_vs_set_constant_buffers, 1); + _sg_stats_add(d3d11.pipeline.num_ps_set_shader, 1); + _sg_stats_add(d3d11.pipeline.num_ps_set_constant_buffers, 1); + } } _SOKOL_PRIVATE bool _sg_d3d11_apply_bindings(_sg_bindings_t* bnd) { @@ -11596,23 +12201,30 @@ _SOKOL_PRIVATE bool _sg_d3d11_apply_bindings(_sg_bindings_t* bnd) { SOKOL_ASSERT(bnd->pip->shader->slot.id == bnd->pip->cmn.shader_id.id); SOKOL_ASSERT(_sg.d3d11.ctx); const _sg_shader_t* shd = bnd->pip->shader; + const bool is_compute = bnd->pip->cmn.is_compute; // gather all the D3D11 resources into arrays ID3D11Buffer* d3d11_ib = bnd->ib ? bnd->ib->d3d11.buf : 0; ID3D11Buffer* d3d11_vbs[SG_MAX_VERTEXBUFFER_BINDSLOTS] = {0}; UINT d3d11_vb_offsets[SG_MAX_VERTEXBUFFER_BINDSLOTS] = {0}; - ID3D11ShaderResourceView* d3d11_vs_srvs[_SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS] = {0}; - ID3D11ShaderResourceView* d3d11_fs_srvs[_SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS] = {0}; + ID3D11ShaderResourceView* d3d11_vs_srvs[_SG_D3D11_MAX_STAGE_SRV_BINDINGS] = {0}; + ID3D11ShaderResourceView* d3d11_fs_srvs[_SG_D3D11_MAX_STAGE_SRV_BINDINGS] = {0}; + ID3D11ShaderResourceView* d3d11_cs_srvs[_SG_D3D11_MAX_STAGE_SRV_BINDINGS] = {0}; ID3D11SamplerState* d3d11_vs_smps[_SG_D3D11_MAX_STAGE_SMP_BINDINGS] = {0}; ID3D11SamplerState* d3d11_fs_smps[_SG_D3D11_MAX_STAGE_SMP_BINDINGS] = {0}; - for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { - const _sg_buffer_t* vb = bnd->vbs[i]; - if (vb == 0) { - continue; + ID3D11SamplerState* d3d11_cs_smps[_SG_D3D11_MAX_STAGE_SMP_BINDINGS] = {0}; + ID3D11UnorderedAccessView* d3d11_cs_uavs[_SG_D3D11_MAX_STAGE_UAV_BINDINGS] = {0}; + + if (!is_compute) { + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + const _sg_buffer_t* vb = bnd->vbs[i]; + if (vb == 0) { + continue; + } + SOKOL_ASSERT(vb->d3d11.buf); + d3d11_vbs[i] = vb->d3d11.buf; + d3d11_vb_offsets[i] = (UINT)bnd->vb_offsets[i]; } - SOKOL_ASSERT(vb->d3d11.buf); - d3d11_vbs[i] = vb->d3d11.buf; - d3d11_vb_offsets[i] = (UINT)bnd->vb_offsets[i]; } for (size_t i = 0; i < SG_MAX_IMAGE_BINDSLOTS; i++) { const _sg_image_t* img = bnd->imgs[i]; @@ -11620,14 +12232,16 @@ _SOKOL_PRIVATE bool _sg_d3d11_apply_bindings(_sg_bindings_t* bnd) { continue; } const sg_shader_stage stage = shd->cmn.images[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); + SOKOL_ASSERT(stage != SG_SHADERSTAGE_NONE); const uint8_t d3d11_slot = shd->d3d11.img_register_t_n[i]; - SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS); + SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_SRV_BINDINGS); SOKOL_ASSERT(img->d3d11.srv); - if (stage == SG_SHADERSTAGE_VERTEX) { - d3d11_vs_srvs[d3d11_slot] = img->d3d11.srv; - } else { - d3d11_fs_srvs[d3d11_slot] = img->d3d11.srv; + ID3D11ShaderResourceView* d3d11_srv = img->d3d11.srv; + switch (stage) { + case SG_SHADERSTAGE_VERTEX: d3d11_vs_srvs[d3d11_slot] = d3d11_srv; break; + case SG_SHADERSTAGE_FRAGMENT: d3d11_fs_srvs[d3d11_slot] = d3d11_srv; break; + case SG_SHADERSTAGE_COMPUTE: d3d11_cs_srvs[d3d11_slot] = d3d11_srv; break; + default: SOKOL_UNREACHABLE; } } for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { @@ -11636,14 +12250,24 @@ _SOKOL_PRIVATE bool _sg_d3d11_apply_bindings(_sg_bindings_t* bnd) { continue; } const sg_shader_stage stage = shd->cmn.storage_buffers[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); - const uint8_t d3d11_slot = shd->d3d11.sbuf_register_t_n[i]; - SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS); - SOKOL_ASSERT(sbuf->d3d11.srv); - if (stage == SG_SHADERSTAGE_VERTEX) { - d3d11_vs_srvs[d3d11_slot] = sbuf->d3d11.srv; + SOKOL_ASSERT(stage != SG_SHADERSTAGE_NONE); + if (shd->cmn.storage_buffers[i].readonly) { + SOKOL_ASSERT(sbuf->d3d11.srv); + const uint8_t d3d11_slot = shd->d3d11.sbuf_register_t_n[i]; + SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_SRV_BINDINGS); + ID3D11ShaderResourceView* d3d11_srv = sbuf->d3d11.srv; + switch (stage) { + case SG_SHADERSTAGE_VERTEX: d3d11_vs_srvs[d3d11_slot] = d3d11_srv; break; + case SG_SHADERSTAGE_FRAGMENT: d3d11_fs_srvs[d3d11_slot] = d3d11_srv; break; + case SG_SHADERSTAGE_COMPUTE: d3d11_cs_srvs[d3d11_slot] = d3d11_srv; break; + default: SOKOL_UNREACHABLE; + } } else { - d3d11_fs_srvs[d3d11_slot] = sbuf->d3d11.srv; + SOKOL_ASSERT(sbuf->d3d11.uav); + SOKOL_ASSERT(stage == SG_SHADERSTAGE_COMPUTE); + const uint8_t d3d11_slot = shd->d3d11.sbuf_register_u_n[i]; + SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_UAV_BINDINGS); + d3d11_cs_uavs[d3d11_slot] = sbuf->d3d11.uav; } } for (size_t i = 0; i < SG_MAX_SAMPLER_BINDSLOTS; i++) { @@ -11652,28 +12276,39 @@ _SOKOL_PRIVATE bool _sg_d3d11_apply_bindings(_sg_bindings_t* bnd) { continue; } const sg_shader_stage stage = shd->cmn.samplers[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); + SOKOL_ASSERT(stage != SG_SHADERSTAGE_NONE); const uint8_t d3d11_slot = shd->d3d11.smp_register_s_n[i]; SOKOL_ASSERT(d3d11_slot < _SG_D3D11_MAX_STAGE_SMP_BINDINGS); SOKOL_ASSERT(smp->d3d11.smp); - if (stage == SG_SHADERSTAGE_VERTEX) { - d3d11_vs_smps[d3d11_slot] = smp->d3d11.smp; - } else { - d3d11_fs_smps[d3d11_slot] = smp->d3d11.smp; + ID3D11SamplerState* d3d11_smp = smp->d3d11.smp; + switch (stage) { + case SG_SHADERSTAGE_VERTEX: d3d11_vs_smps[d3d11_slot] = d3d11_smp; break; + case SG_SHADERSTAGE_FRAGMENT: d3d11_fs_smps[d3d11_slot] = d3d11_smp; break; + case SG_SHADERSTAGE_COMPUTE: d3d11_cs_smps[d3d11_slot] = d3d11_smp; break; + default: SOKOL_UNREACHABLE; } } - _sg_d3d11_IASetVertexBuffers(_sg.d3d11.ctx, 0, SG_MAX_VERTEXBUFFER_BINDSLOTS, d3d11_vbs, bnd->pip->d3d11.vb_strides, d3d11_vb_offsets); - _sg_d3d11_IASetIndexBuffer(_sg.d3d11.ctx, d3d11_ib, bnd->pip->d3d11.index_format, (UINT)bnd->ib_offset); - _sg_d3d11_VSSetShaderResources(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS, d3d11_vs_srvs); - _sg_d3d11_PSSetShaderResources(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS, d3d11_fs_srvs); - _sg_d3d11_VSSetSamplers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SMP_BINDINGS, d3d11_vs_smps); - _sg_d3d11_PSSetSamplers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SMP_BINDINGS, d3d11_fs_smps); - _sg_stats_add(d3d11.bindings.num_ia_set_vertex_buffers, 1); - _sg_stats_add(d3d11.bindings.num_ia_set_index_buffer, 1); - _sg_stats_add(d3d11.bindings.num_vs_set_shader_resources, 1); - _sg_stats_add(d3d11.bindings.num_ps_set_shader_resources, 1); - _sg_stats_add(d3d11.bindings.num_vs_set_samplers, 1); - _sg_stats_add(d3d11.bindings.num_ps_set_samplers, 1); + if (is_compute) { + _sg_d3d11_CSSetShaderResources(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SRV_BINDINGS, d3d11_cs_srvs); + _sg_d3d11_CSSetSamplers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SMP_BINDINGS, d3d11_cs_smps); + _sg_d3d11_CSSetUnorderedAccessViews(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_UAV_BINDINGS, d3d11_cs_uavs, NULL); + _sg_stats_add(d3d11.bindings.num_cs_set_shader_resources, 1); + _sg_stats_add(d3d11.bindings.num_cs_set_samplers, 1); + _sg_stats_add(d3d11.bindings.num_cs_set_unordered_access_views, 1); + } else { + _sg_d3d11_IASetVertexBuffers(_sg.d3d11.ctx, 0, SG_MAX_VERTEXBUFFER_BINDSLOTS, d3d11_vbs, bnd->pip->d3d11.vb_strides, d3d11_vb_offsets); + _sg_d3d11_IASetIndexBuffer(_sg.d3d11.ctx, d3d11_ib, bnd->pip->d3d11.index_format, (UINT)bnd->ib_offset); + _sg_d3d11_VSSetShaderResources(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SRV_BINDINGS, d3d11_vs_srvs); + _sg_d3d11_PSSetShaderResources(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SRV_BINDINGS, d3d11_fs_srvs); + _sg_d3d11_VSSetSamplers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SMP_BINDINGS, d3d11_vs_smps); + _sg_d3d11_PSSetSamplers(_sg.d3d11.ctx, 0, _SG_D3D11_MAX_STAGE_SMP_BINDINGS, d3d11_fs_smps); + _sg_stats_add(d3d11.bindings.num_ia_set_vertex_buffers, 1); + _sg_stats_add(d3d11.bindings.num_ia_set_index_buffer, 1); + _sg_stats_add(d3d11.bindings.num_vs_set_shader_resources, 1); + _sg_stats_add(d3d11.bindings.num_ps_set_shader_resources, 1); + _sg_stats_add(d3d11.bindings.num_vs_set_samplers, 1); + _sg_stats_add(d3d11.bindings.num_ps_set_samplers, 1); + } return true; } @@ -11712,6 +12347,10 @@ _SOKOL_PRIVATE void _sg_d3d11_draw(int base_element, int num_elements, int num_i } } +_SOKOL_PRIVATE void _sg_d3d11_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + _sg_d3d11_Dispatch(_sg.d3d11.ctx, (UINT)num_groups_x, (UINT)num_groups_y, (UINT)num_groups_z); +} + _SOKOL_PRIVATE void _sg_d3d11_commit(void) { // empty } @@ -12173,6 +12812,10 @@ _SOKOL_PRIVATE MTLSamplerMipFilter _sg_mtl_mipmap_filter(sg_filter f) { } } +_SOKOL_PRIVATE size_t _sg_mtl_vertexbuffer_bindslot(size_t sokol_bindslot) { + return sokol_bindslot + _SG_MTL_MAX_STAGE_UB_SBUF_BINDINGS; +} + //-- a pool for all Metal resource objects, with deferred release queue --------- _SOKOL_PRIVATE void _sg_mtl_init_pool(const sg_desc* desc) { _sg.mtl.idpool.num_slots = 2 * @@ -12318,7 +12961,7 @@ _SOKOL_PRIVATE void _sg_mtl_init_caps(void) { _sg.features.origin_top_left = true; _sg.features.mrt_independent_blend_state = true; _sg.features.mrt_independent_write_mask = true; - _sg.features.storage_buffer = true; + _sg.features.compute = true; _sg.features.msaa_image_bindings = true; _sg.features.image_clamp_to_border = false; @@ -12524,9 +13167,10 @@ _SOKOL_PRIVATE void _sg_mtl_discard_backend(void) { for (int i = 0; i < SG_NUM_INFLIGHT_FRAMES; i++) { _SG_OBJC_RELEASE(_sg.mtl.uniform_buffers[i]); } - // NOTE: MTLCommandBuffer and MTLRenderCommandEncoder are auto-released + // NOTE: MTLCommandBuffer, MTLRenderCommandEncoder and MTLComputeCommandEncoder are auto-released _sg.mtl.cmd_buffer = nil; - _sg.mtl.cmd_encoder = nil; + _sg.mtl.render_cmd_encoder = nil; + _sg.mtl.compute_cmd_encoder = nil; } _SOKOL_PRIVATE void _sg_mtl_reset_state_cache(void) { @@ -12535,6 +13179,7 @@ _SOKOL_PRIVATE void _sg_mtl_reset_state_cache(void) { _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_buffer(_sg_buffer_t* buf, const sg_buffer_desc* desc) { SOKOL_ASSERT(buf && desc); + SOKOL_ASSERT(buf->cmn.size > 0); const bool injected = (0 != desc->mtl_buffers[0]); MTLResourceOptions mtl_options = _sg_mtl_buffer_resource_options(buf->cmn.usage); for (int slot = 0; slot < buf->cmn.num_slots; slot++) { @@ -12543,10 +13188,11 @@ _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_buffer(_sg_buffer_t* buf, const SOKOL_ASSERT(desc->mtl_buffers[slot]); mtl_buf = (__bridge id) desc->mtl_buffers[slot]; } else { - if (buf->cmn.usage == SG_USAGE_IMMUTABLE) { - SOKOL_ASSERT(desc->data.ptr); + if (desc->data.ptr) { + SOKOL_ASSERT(desc->data.size > 0); mtl_buf = [_sg.mtl.device newBufferWithBytes:desc->data.ptr length:(NSUInteger)buf->cmn.size options:mtl_options]; } else { + // this is guaranteed to zero-initialize the buffer mtl_buf = [_sg.mtl.device newBufferWithLength:(NSUInteger)buf->cmn.size options:mtl_options]; } if (nil == mtl_buf) { @@ -12878,6 +13524,11 @@ _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_shader(_sg_shader_t* shd, const return SG_RESOURCESTATE_FAILED; } + shd->mtl.threads_per_threadgroup = MTLSizeMake( + (NSUInteger)desc->mtl_threads_per_threadgroup.x, + (NSUInteger)desc->mtl_threads_per_threadgroup.y, + (NSUInteger)desc->mtl_threads_per_threadgroup.z); + // copy resource bindslot mappings for (size_t i = 0; i < SG_MAX_UNIFORMBLOCK_BINDSLOTS; i++) { shd->mtl.ub_buffer_n[i] = desc->uniform_blocks[i].msl_buffer_n; @@ -12893,11 +13544,20 @@ _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_shader(_sg_shader_t* shd, const } // create metal library and function objects - bool shd_valid = _sg_mtl_create_shader_func(&desc->vertex_func, desc->label, "vs", &shd->mtl.vertex_func); - shd_valid &= _sg_mtl_create_shader_func(&desc->fragment_func, desc->label, "fs", &shd->mtl.fragment_func); + bool shd_valid = true; + if (desc->vertex_func.source || desc->vertex_func.bytecode.ptr) { + shd_valid &= _sg_mtl_create_shader_func(&desc->vertex_func, desc->label, "vs", &shd->mtl.vertex_func); + } + if (desc->fragment_func.source || desc->fragment_func.bytecode.ptr) { + shd_valid &= _sg_mtl_create_shader_func(&desc->fragment_func, desc->label, "fs", &shd->mtl.fragment_func); + } + if (desc->compute_func.source || desc->compute_func.bytecode.ptr) { + shd_valid &= _sg_mtl_create_shader_func(&desc->compute_func, desc->label, "cs", &shd->mtl.compute_func); + } if (!shd_valid) { _sg_mtl_discard_shader_func(&shd->mtl.vertex_func); _sg_mtl_discard_shader_func(&shd->mtl.fragment_func); + _sg_mtl_discard_shader_func(&shd->mtl.compute_func); } return shd_valid ? SG_RESOURCESTATE_VALID : SG_RESOURCESTATE_FAILED; } @@ -12906,6 +13566,7 @@ _SOKOL_PRIVATE void _sg_mtl_discard_shader(_sg_shader_t* shd) { SOKOL_ASSERT(shd); _sg_mtl_discard_shader_func(&shd->mtl.vertex_func); _sg_mtl_discard_shader_func(&shd->mtl.fragment_func); + _sg_mtl_discard_shader_func(&shd->mtl.compute_func); } _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_pipeline(_sg_pipeline_t* pip, _sg_shader_t* shd, const sg_pipeline_desc* desc) { @@ -12914,130 +13575,185 @@ _SOKOL_PRIVATE sg_resource_state _sg_mtl_create_pipeline(_sg_pipeline_t* pip, _s pip->shader = shd; - sg_primitive_type prim_type = desc->primitive_type; - pip->mtl.prim_type = _sg_mtl_primitive_type(prim_type); - pip->mtl.index_size = _sg_mtl_index_size(pip->cmn.index_type); - if (SG_INDEXTYPE_NONE != pip->cmn.index_type) { - pip->mtl.index_type = _sg_mtl_index_type(pip->cmn.index_type); - } - pip->mtl.cull_mode = _sg_mtl_cull_mode(desc->cull_mode); - pip->mtl.winding = _sg_mtl_winding(desc->face_winding); - pip->mtl.stencil_ref = desc->stencil.ref; - - // create vertex-descriptor - MTLVertexDescriptor* vtx_desc = [MTLVertexDescriptor vertexDescriptor]; - for (NSUInteger attr_index = 0; attr_index < SG_MAX_VERTEX_ATTRIBUTES; attr_index++) { - const sg_vertex_attr_state* a_state = &desc->layout.attrs[attr_index]; - if (a_state->format == SG_VERTEXFORMAT_INVALID) { - break; - } - SOKOL_ASSERT(a_state->buffer_index < SG_MAX_VERTEXBUFFER_BINDSLOTS); - SOKOL_ASSERT(pip->cmn.vertex_buffer_layout_active[a_state->buffer_index]); - vtx_desc.attributes[attr_index].format = _sg_mtl_vertex_format(a_state->format); - vtx_desc.attributes[attr_index].offset = (NSUInteger)a_state->offset; - vtx_desc.attributes[attr_index].bufferIndex = (NSUInteger)(a_state->buffer_index + SG_MAX_UNIFORMBLOCK_BINDSLOTS + SG_MAX_STORAGEBUFFER_BINDSLOTS); - } - for (NSUInteger layout_index = 0; layout_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; layout_index++) { - if (pip->cmn.vertex_buffer_layout_active[layout_index]) { - const sg_vertex_buffer_layout_state* l_state = &desc->layout.buffers[layout_index]; - const NSUInteger mtl_vb_slot = layout_index + SG_MAX_UNIFORMBLOCK_BINDSLOTS + SG_MAX_STORAGEBUFFER_BINDSLOTS; - SOKOL_ASSERT(l_state->stride > 0); - vtx_desc.layouts[mtl_vb_slot].stride = (NSUInteger)l_state->stride; - vtx_desc.layouts[mtl_vb_slot].stepFunction = _sg_mtl_step_function(l_state->step_func); - vtx_desc.layouts[mtl_vb_slot].stepRate = (NSUInteger)l_state->step_rate; - if (SG_VERTEXSTEP_PER_INSTANCE == l_state->step_func) { - // NOTE: not actually used in _sg_mtl_draw() - pip->cmn.use_instanced_draw = true; + if (pip->cmn.is_compute) { + NSError* err = NULL; + MTLComputePipelineDescriptor* cp_desc = [[MTLComputePipelineDescriptor alloc] init]; + cp_desc.computeFunction = _sg_mtl_id(shd->mtl.compute_func.mtl_func); + cp_desc.threadGroupSizeIsMultipleOfThreadExecutionWidth = true; + for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { + const sg_shader_stage stage = shd->cmn.storage_buffers[i].stage; + SOKOL_ASSERT((stage != SG_SHADERSTAGE_VERTEX) && (stage != SG_SHADERSTAGE_FRAGMENT)); + if ((stage == SG_SHADERSTAGE_COMPUTE) && shd->cmn.storage_buffers[i].readonly) { + const NSUInteger mtl_slot = shd->mtl.sbuf_buffer_n[i]; + cp_desc.buffers[mtl_slot].mutability = MTLMutabilityImmutable; } } - } - - // render-pipeline descriptor - MTLRenderPipelineDescriptor* rp_desc = [[MTLRenderPipelineDescriptor alloc] init]; - rp_desc.vertexDescriptor = vtx_desc; - SOKOL_ASSERT(shd->mtl.vertex_func.mtl_func != _SG_MTL_INVALID_SLOT_INDEX); - rp_desc.vertexFunction = _sg_mtl_id(shd->mtl.vertex_func.mtl_func); - SOKOL_ASSERT(shd->mtl.fragment_func.mtl_func != _SG_MTL_INVALID_SLOT_INDEX); - rp_desc.fragmentFunction = _sg_mtl_id(shd->mtl.fragment_func.mtl_func); - rp_desc.rasterSampleCount = (NSUInteger)desc->sample_count; - rp_desc.alphaToCoverageEnabled = desc->alpha_to_coverage_enabled; - rp_desc.alphaToOneEnabled = NO; - rp_desc.rasterizationEnabled = YES; - rp_desc.depthAttachmentPixelFormat = _sg_mtl_pixel_format(desc->depth.pixel_format); - if (desc->depth.pixel_format == SG_PIXELFORMAT_DEPTH_STENCIL) { - rp_desc.stencilAttachmentPixelFormat = _sg_mtl_pixel_format(desc->depth.pixel_format); - } - for (NSUInteger i = 0; i < (NSUInteger)desc->color_count; i++) { - SOKOL_ASSERT(i < SG_MAX_COLOR_ATTACHMENTS); - const sg_color_target_state* cs = &desc->colors[i]; - rp_desc.colorAttachments[i].pixelFormat = _sg_mtl_pixel_format(cs->pixel_format); - rp_desc.colorAttachments[i].writeMask = _sg_mtl_color_write_mask(cs->write_mask); - rp_desc.colorAttachments[i].blendingEnabled = cs->blend.enabled; - rp_desc.colorAttachments[i].alphaBlendOperation = _sg_mtl_blend_op(cs->blend.op_alpha); - rp_desc.colorAttachments[i].rgbBlendOperation = _sg_mtl_blend_op(cs->blend.op_rgb); - rp_desc.colorAttachments[i].destinationAlphaBlendFactor = _sg_mtl_blend_factor(cs->blend.dst_factor_alpha); - rp_desc.colorAttachments[i].destinationRGBBlendFactor = _sg_mtl_blend_factor(cs->blend.dst_factor_rgb); - rp_desc.colorAttachments[i].sourceAlphaBlendFactor = _sg_mtl_blend_factor(cs->blend.src_factor_alpha); - rp_desc.colorAttachments[i].sourceRGBBlendFactor = _sg_mtl_blend_factor(cs->blend.src_factor_rgb); - } - #if defined(SOKOL_DEBUG) - if (desc->label) { - rp_desc.label = [NSString stringWithFormat:@"%s", desc->label]; + #if defined(SOKOL_DEBUG) + if (desc->label) { + cp_desc.label = [NSString stringWithFormat:@"%s", desc->label]; + } + #endif + id mtl_cps = [_sg.mtl.device + newComputePipelineStateWithDescriptor:cp_desc + options:MTLPipelineOptionNone + reflection:nil + error:&err]; + _SG_OBJC_RELEASE(cp_desc); + if (nil == mtl_cps) { + SOKOL_ASSERT(err); + _SG_ERROR(METAL_CREATE_CPS_FAILED); + _SG_LOGMSG(METAL_CREATE_CPS_OUTPUT, [err.localizedDescription UTF8String]); + return SG_RESOURCESTATE_FAILED; } - #endif - NSError* err = NULL; - id mtl_rps = [_sg.mtl.device newRenderPipelineStateWithDescriptor:rp_desc error:&err]; - _SG_OBJC_RELEASE(rp_desc); - if (nil == mtl_rps) { - SOKOL_ASSERT(err); - _SG_ERROR(METAL_CREATE_RPS_FAILED); - _SG_LOGMSG(METAL_CREATE_RPS_OUTPUT, [err.localizedDescription UTF8String]); - return SG_RESOURCESTATE_FAILED; - } - pip->mtl.rps = _sg_mtl_add_resource(mtl_rps); - _SG_OBJC_RELEASE(mtl_rps); - - // depth-stencil-state - MTLDepthStencilDescriptor* ds_desc = [[MTLDepthStencilDescriptor alloc] init]; - ds_desc.depthCompareFunction = _sg_mtl_compare_func(desc->depth.compare); - ds_desc.depthWriteEnabled = desc->depth.write_enabled; - if (desc->stencil.enabled) { - const sg_stencil_face_state* sb = &desc->stencil.back; - ds_desc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; - ds_desc.backFaceStencil.stencilFailureOperation = _sg_mtl_stencil_op(sb->fail_op); - ds_desc.backFaceStencil.depthFailureOperation = _sg_mtl_stencil_op(sb->depth_fail_op); - ds_desc.backFaceStencil.depthStencilPassOperation = _sg_mtl_stencil_op(sb->pass_op); - ds_desc.backFaceStencil.stencilCompareFunction = _sg_mtl_compare_func(sb->compare); - ds_desc.backFaceStencil.readMask = desc->stencil.read_mask; - ds_desc.backFaceStencil.writeMask = desc->stencil.write_mask; - const sg_stencil_face_state* sf = &desc->stencil.front; - ds_desc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; - ds_desc.frontFaceStencil.stencilFailureOperation = _sg_mtl_stencil_op(sf->fail_op); - ds_desc.frontFaceStencil.depthFailureOperation = _sg_mtl_stencil_op(sf->depth_fail_op); - ds_desc.frontFaceStencil.depthStencilPassOperation = _sg_mtl_stencil_op(sf->pass_op); - ds_desc.frontFaceStencil.stencilCompareFunction = _sg_mtl_compare_func(sf->compare); - ds_desc.frontFaceStencil.readMask = desc->stencil.read_mask; - ds_desc.frontFaceStencil.writeMask = desc->stencil.write_mask; - } - #if defined(SOKOL_DEBUG) - if (desc->label) { - ds_desc.label = [NSString stringWithFormat:@"%s.dss", desc->label]; + pip->mtl.cps = _sg_mtl_add_resource(mtl_cps); + _SG_OBJC_RELEASE(mtl_cps); + pip->mtl.threads_per_threadgroup = shd->mtl.threads_per_threadgroup; + } else { + sg_primitive_type prim_type = desc->primitive_type; + pip->mtl.prim_type = _sg_mtl_primitive_type(prim_type); + pip->mtl.index_size = _sg_mtl_index_size(pip->cmn.index_type); + if (SG_INDEXTYPE_NONE != pip->cmn.index_type) { + pip->mtl.index_type = _sg_mtl_index_type(pip->cmn.index_type); } - #endif - id mtl_dss = [_sg.mtl.device newDepthStencilStateWithDescriptor:ds_desc]; - _SG_OBJC_RELEASE(ds_desc); - if (nil == mtl_dss) { - _SG_ERROR(METAL_CREATE_DSS_FAILED); - return SG_RESOURCESTATE_FAILED; + pip->mtl.cull_mode = _sg_mtl_cull_mode(desc->cull_mode); + pip->mtl.winding = _sg_mtl_winding(desc->face_winding); + pip->mtl.stencil_ref = desc->stencil.ref; + + // create vertex-descriptor + MTLVertexDescriptor* vtx_desc = [MTLVertexDescriptor vertexDescriptor]; + for (NSUInteger attr_index = 0; attr_index < SG_MAX_VERTEX_ATTRIBUTES; attr_index++) { + const sg_vertex_attr_state* a_state = &desc->layout.attrs[attr_index]; + if (a_state->format == SG_VERTEXFORMAT_INVALID) { + break; + } + SOKOL_ASSERT(a_state->buffer_index < SG_MAX_VERTEXBUFFER_BINDSLOTS); + SOKOL_ASSERT(pip->cmn.vertex_buffer_layout_active[a_state->buffer_index]); + vtx_desc.attributes[attr_index].format = _sg_mtl_vertex_format(a_state->format); + vtx_desc.attributes[attr_index].offset = (NSUInteger)a_state->offset; + vtx_desc.attributes[attr_index].bufferIndex = _sg_mtl_vertexbuffer_bindslot((size_t)a_state->buffer_index); + } + for (NSUInteger layout_index = 0; layout_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; layout_index++) { + if (pip->cmn.vertex_buffer_layout_active[layout_index]) { + const sg_vertex_buffer_layout_state* l_state = &desc->layout.buffers[layout_index]; + const NSUInteger mtl_vb_slot = _sg_mtl_vertexbuffer_bindslot(layout_index); + SOKOL_ASSERT(l_state->stride > 0); + vtx_desc.layouts[mtl_vb_slot].stride = (NSUInteger)l_state->stride; + vtx_desc.layouts[mtl_vb_slot].stepFunction = _sg_mtl_step_function(l_state->step_func); + vtx_desc.layouts[mtl_vb_slot].stepRate = (NSUInteger)l_state->step_rate; + if (SG_VERTEXSTEP_PER_INSTANCE == l_state->step_func) { + // NOTE: not actually used in _sg_mtl_draw() + pip->cmn.use_instanced_draw = true; + } + } + } + + // render-pipeline descriptor + MTLRenderPipelineDescriptor* rp_desc = [[MTLRenderPipelineDescriptor alloc] init]; + rp_desc.vertexDescriptor = vtx_desc; + SOKOL_ASSERT(shd->mtl.vertex_func.mtl_func != _SG_MTL_INVALID_SLOT_INDEX); + rp_desc.vertexFunction = _sg_mtl_id(shd->mtl.vertex_func.mtl_func); + SOKOL_ASSERT(shd->mtl.fragment_func.mtl_func != _SG_MTL_INVALID_SLOT_INDEX); + rp_desc.fragmentFunction = _sg_mtl_id(shd->mtl.fragment_func.mtl_func); + rp_desc.rasterSampleCount = (NSUInteger)desc->sample_count; + rp_desc.alphaToCoverageEnabled = desc->alpha_to_coverage_enabled; + rp_desc.alphaToOneEnabled = NO; + rp_desc.rasterizationEnabled = YES; + rp_desc.depthAttachmentPixelFormat = _sg_mtl_pixel_format(desc->depth.pixel_format); + if (desc->depth.pixel_format == SG_PIXELFORMAT_DEPTH_STENCIL) { + rp_desc.stencilAttachmentPixelFormat = _sg_mtl_pixel_format(desc->depth.pixel_format); + } + for (NSUInteger i = 0; i < (NSUInteger)desc->color_count; i++) { + SOKOL_ASSERT(i < SG_MAX_COLOR_ATTACHMENTS); + const sg_color_target_state* cs = &desc->colors[i]; + rp_desc.colorAttachments[i].pixelFormat = _sg_mtl_pixel_format(cs->pixel_format); + rp_desc.colorAttachments[i].writeMask = _sg_mtl_color_write_mask(cs->write_mask); + rp_desc.colorAttachments[i].blendingEnabled = cs->blend.enabled; + rp_desc.colorAttachments[i].alphaBlendOperation = _sg_mtl_blend_op(cs->blend.op_alpha); + rp_desc.colorAttachments[i].rgbBlendOperation = _sg_mtl_blend_op(cs->blend.op_rgb); + rp_desc.colorAttachments[i].destinationAlphaBlendFactor = _sg_mtl_blend_factor(cs->blend.dst_factor_alpha); + rp_desc.colorAttachments[i].destinationRGBBlendFactor = _sg_mtl_blend_factor(cs->blend.dst_factor_rgb); + rp_desc.colorAttachments[i].sourceAlphaBlendFactor = _sg_mtl_blend_factor(cs->blend.src_factor_alpha); + rp_desc.colorAttachments[i].sourceRGBBlendFactor = _sg_mtl_blend_factor(cs->blend.src_factor_rgb); + } + // set buffer mutability for all read-only buffers (vertex buffers and read-only storage buffers) + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + if (pip->cmn.vertex_buffer_layout_active[i]) { + const NSUInteger mtl_slot = _sg_mtl_vertexbuffer_bindslot(i); + rp_desc.vertexBuffers[mtl_slot].mutability = MTLMutabilityImmutable; + } + } + for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { + const NSUInteger mtl_slot = shd->mtl.sbuf_buffer_n[i]; + const sg_shader_stage stage = shd->cmn.storage_buffers[i].stage; + SOKOL_ASSERT(stage != SG_SHADERSTAGE_COMPUTE); + if (stage == SG_SHADERSTAGE_VERTEX) { + SOKOL_ASSERT(shd->cmn.storage_buffers[i].readonly); + rp_desc.vertexBuffers[mtl_slot].mutability = MTLMutabilityImmutable; + } else if (stage == SG_SHADERSTAGE_FRAGMENT) { + SOKOL_ASSERT(shd->cmn.storage_buffers[i].readonly); + rp_desc.fragmentBuffers[mtl_slot].mutability = MTLMutabilityImmutable; + } + } + #if defined(SOKOL_DEBUG) + if (desc->label) { + rp_desc.label = [NSString stringWithFormat:@"%s", desc->label]; + } + #endif + NSError* err = NULL; + id mtl_rps = [_sg.mtl.device newRenderPipelineStateWithDescriptor:rp_desc error:&err]; + _SG_OBJC_RELEASE(rp_desc); + if (nil == mtl_rps) { + SOKOL_ASSERT(err); + _SG_ERROR(METAL_CREATE_RPS_FAILED); + _SG_LOGMSG(METAL_CREATE_RPS_OUTPUT, [err.localizedDescription UTF8String]); + return SG_RESOURCESTATE_FAILED; + } + pip->mtl.rps = _sg_mtl_add_resource(mtl_rps); + _SG_OBJC_RELEASE(mtl_rps); + + // depth-stencil-state + MTLDepthStencilDescriptor* ds_desc = [[MTLDepthStencilDescriptor alloc] init]; + ds_desc.depthCompareFunction = _sg_mtl_compare_func(desc->depth.compare); + ds_desc.depthWriteEnabled = desc->depth.write_enabled; + if (desc->stencil.enabled) { + const sg_stencil_face_state* sb = &desc->stencil.back; + ds_desc.backFaceStencil = [[MTLStencilDescriptor alloc] init]; + ds_desc.backFaceStencil.stencilFailureOperation = _sg_mtl_stencil_op(sb->fail_op); + ds_desc.backFaceStencil.depthFailureOperation = _sg_mtl_stencil_op(sb->depth_fail_op); + ds_desc.backFaceStencil.depthStencilPassOperation = _sg_mtl_stencil_op(sb->pass_op); + ds_desc.backFaceStencil.stencilCompareFunction = _sg_mtl_compare_func(sb->compare); + ds_desc.backFaceStencil.readMask = desc->stencil.read_mask; + ds_desc.backFaceStencil.writeMask = desc->stencil.write_mask; + const sg_stencil_face_state* sf = &desc->stencil.front; + ds_desc.frontFaceStencil = [[MTLStencilDescriptor alloc] init]; + ds_desc.frontFaceStencil.stencilFailureOperation = _sg_mtl_stencil_op(sf->fail_op); + ds_desc.frontFaceStencil.depthFailureOperation = _sg_mtl_stencil_op(sf->depth_fail_op); + ds_desc.frontFaceStencil.depthStencilPassOperation = _sg_mtl_stencil_op(sf->pass_op); + ds_desc.frontFaceStencil.stencilCompareFunction = _sg_mtl_compare_func(sf->compare); + ds_desc.frontFaceStencil.readMask = desc->stencil.read_mask; + ds_desc.frontFaceStencil.writeMask = desc->stencil.write_mask; + } + #if defined(SOKOL_DEBUG) + if (desc->label) { + ds_desc.label = [NSString stringWithFormat:@"%s.dss", desc->label]; + } + #endif + id mtl_dss = [_sg.mtl.device newDepthStencilStateWithDescriptor:ds_desc]; + _SG_OBJC_RELEASE(ds_desc); + if (nil == mtl_dss) { + _SG_ERROR(METAL_CREATE_DSS_FAILED); + return SG_RESOURCESTATE_FAILED; + } + pip->mtl.dss = _sg_mtl_add_resource(mtl_dss); + _SG_OBJC_RELEASE(mtl_dss); } - pip->mtl.dss = _sg_mtl_add_resource(mtl_dss); - _SG_OBJC_RELEASE(mtl_dss); return SG_RESOURCESTATE_VALID; } _SOKOL_PRIVATE void _sg_mtl_discard_pipeline(_sg_pipeline_t* pip) { SOKOL_ASSERT(pip); // it's valid to call release resource with a 'null resource' + _sg_mtl_release_resource(_sg.frame_index, pip->mtl.cps); _sg_mtl_release_resource(_sg.frame_index, pip->mtl.rps); _sg_mtl_release_resource(_sg.frame_index, pip->mtl.dss); } @@ -13098,65 +13814,63 @@ _SOKOL_PRIVATE _sg_image_t* _sg_mtl_attachments_ds_image(const _sg_attachments_t } _SOKOL_PRIVATE void _sg_mtl_bind_uniform_buffers(void) { - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); - // On Metal, uniform buffer bindings happen once in sg_begin_pass() and + // In the Metal backend, uniform buffer bindings happen once in sg_begin_pass() and // remain valid for the entire pass. Only binding offsets will be updated // in sg_apply_uniforms() - for (size_t slot = 0; slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS; slot++) { - [_sg.mtl.cmd_encoder - setVertexBuffer:_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] - offset:0 - atIndex:slot]; - [_sg.mtl.cmd_encoder - setFragmentBuffer:_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] - offset:0 - atIndex:slot]; + if (_sg.cur_pass.is_compute) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + for (size_t slot = 0; slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS; slot++) { + [_sg.mtl.compute_cmd_encoder + setBuffer:_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] + offset:0 + atIndex:slot]; + } + } else { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); + for (size_t slot = 0; slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS; slot++) { + [_sg.mtl.render_cmd_encoder + setVertexBuffer:_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] + offset:0 + atIndex:slot]; + [_sg.mtl.render_cmd_encoder + setFragmentBuffer:_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] + offset:0 + atIndex:slot]; + } } } -_SOKOL_PRIVATE void _sg_mtl_begin_pass(const sg_pass* pass) { +_SOKOL_PRIVATE void _sg_mtl_begin_compute_pass(const sg_pass* pass) { + SOKOL_ASSERT(pass); (void)pass; + SOKOL_ASSERT(nil != _sg.mtl.cmd_buffer); + SOKOL_ASSERT(nil == _sg.mtl.compute_cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.render_cmd_encoder); + + // NOTE: we actually want computeCommandEncoderWithDispatchType:MTLDispatchTypeConcurrent, but + // that requires bumping the macOS base version to 10.14 + _sg.mtl.compute_cmd_encoder = [_sg.mtl.cmd_buffer computeCommandEncoder]; + if (nil == _sg.mtl.compute_cmd_encoder) { + _sg.cur_pass.valid = false; + return; + } + + #if defined(SOKOL_DEBUG) + if (pass->label) { + _sg.mtl.compute_cmd_encoder.label = [NSString stringWithUTF8String:pass->label]; + } + #endif +} + +_SOKOL_PRIVATE void _sg_mtl_begin_render_pass(const sg_pass* pass) { SOKOL_ASSERT(pass); - SOKOL_ASSERT(_sg.mtl.cmd_queue); - SOKOL_ASSERT(nil == _sg.mtl.cmd_encoder); - SOKOL_ASSERT(nil == _sg.mtl.cur_drawable); - _sg_mtl_clear_state_cache(); + SOKOL_ASSERT(nil != _sg.mtl.cmd_buffer); + SOKOL_ASSERT(nil == _sg.mtl.render_cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.compute_cmd_encoder); const _sg_attachments_t* atts = _sg.cur_pass.atts; const sg_swapchain* swapchain = &pass->swapchain; const sg_pass_action* action = &pass->action; - /* - if this is the first pass in the frame, create command buffers - - NOTE: we're creating two command buffers here, one with unretained references - for storing the regular commands, and one with retained references for - storing the presentDrawable call (this needs to hold on the drawable until - presentation has happened - and the easiest way to do this is to let the - command buffer manage the lifetime of the drawable). - - Also see: https://github.com/floooh/sokol/issues/762 - */ - if (nil == _sg.mtl.cmd_buffer) { - // block until the oldest frame in flight has finished - dispatch_semaphore_wait(_sg.mtl.sem, DISPATCH_TIME_FOREVER); - if (_sg.desc.mtl_use_command_buffer_with_retained_references) { - _sg.mtl.cmd_buffer = [_sg.mtl.cmd_queue commandBuffer]; - } else { - _sg.mtl.cmd_buffer = [_sg.mtl.cmd_queue commandBufferWithUnretainedReferences]; - } - [_sg.mtl.cmd_buffer enqueue]; - [_sg.mtl.cmd_buffer addCompletedHandler:^(id cmd_buf) { - // NOTE: this code is called on a different thread! - _SOKOL_UNUSED(cmd_buf); - dispatch_semaphore_signal(_sg.mtl.sem); - }]; - } - - // if this is first pass in frame, get uniform buffer base pointer - if (0 == _sg.mtl.cur_ub_base_ptr) { - _sg.mtl.cur_ub_base_ptr = (uint8_t*)[_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] contents]; - } - MTLRenderPassDescriptor* pass_desc = [MTLRenderPassDescriptor renderPassDescriptor]; SOKOL_ASSERT(pass_desc); if (atts) { @@ -13297,27 +14011,87 @@ _SOKOL_PRIVATE void _sg_mtl_begin_pass(const sg_pass* pass) { // render command encoder is also returned in a minimized window // === // create a render command encoder, this might return nil if window is minimized - _sg.mtl.cmd_encoder = [_sg.mtl.cmd_buffer renderCommandEncoderWithDescriptor:pass_desc]; - if (nil == _sg.mtl.cmd_encoder) { + _sg.mtl.render_cmd_encoder = [_sg.mtl.cmd_buffer renderCommandEncoderWithDescriptor:pass_desc]; + if (nil == _sg.mtl.render_cmd_encoder) { _sg.cur_pass.valid = false; return; } #if defined(SOKOL_DEBUG) - if (pass->label) { - _sg.mtl.cmd_encoder.label = [NSString stringWithUTF8String:pass->label]; - } + if (pass->label) { + _sg.mtl.render_cmd_encoder.label = [NSString stringWithUTF8String:pass->label]; + } #endif +} + +_SOKOL_PRIVATE void _sg_mtl_begin_pass(const sg_pass* pass) { + SOKOL_ASSERT(pass); + SOKOL_ASSERT(_sg.mtl.cmd_queue); + SOKOL_ASSERT(nil == _sg.mtl.compute_cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.render_cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.cur_drawable); + _sg_mtl_clear_state_cache(); + + // if this is the first pass in the frame, create one command buffer and blit-cmd-encoder for the entire frame + if (nil == _sg.mtl.cmd_buffer) { + // block until the oldest frame in flight has finished + dispatch_semaphore_wait(_sg.mtl.sem, DISPATCH_TIME_FOREVER); + if (_sg.desc.mtl_use_command_buffer_with_retained_references) { + _sg.mtl.cmd_buffer = [_sg.mtl.cmd_queue commandBuffer]; + } else { + _sg.mtl.cmd_buffer = [_sg.mtl.cmd_queue commandBufferWithUnretainedReferences]; + } + [_sg.mtl.cmd_buffer enqueue]; + [_sg.mtl.cmd_buffer addCompletedHandler:^(id cmd_buf) { + // NOTE: this code is called on a different thread! + _SOKOL_UNUSED(cmd_buf); + dispatch_semaphore_signal(_sg.mtl.sem); + }]; + } + + // if this is first pass in frame, get uniform buffer base pointer + if (0 == _sg.mtl.cur_ub_base_ptr) { + _sg.mtl.cur_ub_base_ptr = (uint8_t*)[_sg.mtl.uniform_buffers[_sg.mtl.cur_frame_rotate_index] contents]; + } + + if (pass->compute) { + _sg_mtl_begin_compute_pass(pass); + } else { + _sg_mtl_begin_render_pass(pass); + } // bind uniform buffers, those bindings remain valid for the entire pass - _sg_mtl_bind_uniform_buffers(); + if (_sg.cur_pass.valid) { + _sg_mtl_bind_uniform_buffers(); + } } _SOKOL_PRIVATE void _sg_mtl_end_pass(void) { - if (nil != _sg.mtl.cmd_encoder) { - [_sg.mtl.cmd_encoder endEncoding]; + if (nil != _sg.mtl.render_cmd_encoder) { + [_sg.mtl.render_cmd_encoder endEncoding]; // NOTE: MTLRenderCommandEncoder is autoreleased - _sg.mtl.cmd_encoder = nil; + _sg.mtl.render_cmd_encoder = nil; + } + if (nil != _sg.mtl.compute_cmd_encoder) { + [_sg.mtl.compute_cmd_encoder endEncoding]; + // NOTE: MTLComputeCommandEncoder is autoreleased + _sg.mtl.compute_cmd_encoder = nil; + + // synchronize any managed buffers written by the GPU + #if defined(_SG_TARGET_MACOS) + if (_sg_mtl_resource_options_storage_mode_managed_or_shared() == MTLResourceStorageModeManaged) { + if (_sg.compute.readwrite_sbufs.cur > 0) { + id blit_cmd_encoder = [_sg.mtl.cmd_buffer blitCommandEncoder]; + for (uint32_t i = 0; i < _sg.compute.readwrite_sbufs.cur; i++) { + _sg_buffer_t* sbuf = _sg_lookup_buffer(&_sg.pools, _sg.compute.readwrite_sbufs.items[i]); + if (sbuf) { + [blit_cmd_encoder synchronizeResource:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot])]; + } + } + [blit_cmd_encoder endEncoding]; + } + } + #endif } // if this is a swapchain pass, present the drawable if (nil != _sg.mtl.cur_drawable) { @@ -13327,7 +14101,8 @@ _SOKOL_PRIVATE void _sg_mtl_end_pass(void) { } _SOKOL_PRIVATE void _sg_mtl_commit(void) { - SOKOL_ASSERT(nil == _sg.mtl.cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.render_cmd_encoder); + SOKOL_ASSERT(nil == _sg.mtl.compute_cmd_encoder); SOKOL_ASSERT(nil != _sg.mtl.cmd_buffer); // commit the frame's command buffer @@ -13347,7 +14122,7 @@ _SOKOL_PRIVATE void _sg_mtl_commit(void) { } _SOKOL_PRIVATE void _sg_mtl_apply_viewport(int x, int y, int w, int h, bool origin_top_left) { - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); SOKOL_ASSERT(_sg.cur_pass.height > 0); MTLViewport vp; vp.originX = (double) x; @@ -13356,11 +14131,11 @@ _SOKOL_PRIVATE void _sg_mtl_apply_viewport(int x, int y, int w, int h, bool orig vp.height = (double) h; vp.znear = 0.0; vp.zfar = 1.0; - [_sg.mtl.cmd_encoder setViewport:vp]; + [_sg.mtl.render_cmd_encoder setViewport:vp]; } _SOKOL_PRIVATE void _sg_mtl_apply_scissor_rect(int x, int y, int w, int h, bool origin_top_left) { - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); SOKOL_ASSERT(_sg.cur_pass.width > 0); SOKOL_ASSERT(_sg.cur_pass.height > 0); // clip against framebuffer rect @@ -13370,79 +14145,90 @@ _SOKOL_PRIVATE void _sg_mtl_apply_scissor_rect(int x, int y, int w, int h, bool r.y = (NSUInteger) (origin_top_left ? clip.y : (_sg.cur_pass.height - (clip.y + clip.h))); r.width = (NSUInteger)clip.w; r.height = (NSUInteger)clip.h; - [_sg.mtl.cmd_encoder setScissorRect:r]; + [_sg.mtl.render_cmd_encoder setScissorRect:r]; } _SOKOL_PRIVATE void _sg_mtl_apply_pipeline(_sg_pipeline_t* pip) { SOKOL_ASSERT(pip); SOKOL_ASSERT(pip->shader && (pip->cmn.shader_id.id == pip->shader->slot.id)); - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); - if (_sg.mtl.state_cache.cur_pipeline_id.id != pip->slot.id) { _sg.mtl.state_cache.cur_pipeline = pip; _sg.mtl.state_cache.cur_pipeline_id.id = pip->slot.id; - sg_color c = pip->cmn.blend_color; - [_sg.mtl.cmd_encoder setBlendColorRed:c.r green:c.g blue:c.b alpha:c.a]; - _sg_stats_add(metal.pipeline.num_set_blend_color, 1); - [_sg.mtl.cmd_encoder setCullMode:pip->mtl.cull_mode]; - _sg_stats_add(metal.pipeline.num_set_cull_mode, 1); - [_sg.mtl.cmd_encoder setFrontFacingWinding:pip->mtl.winding]; - _sg_stats_add(metal.pipeline.num_set_front_facing_winding, 1); - [_sg.mtl.cmd_encoder setStencilReferenceValue:pip->mtl.stencil_ref]; - _sg_stats_add(metal.pipeline.num_set_stencil_reference_value, 1); - [_sg.mtl.cmd_encoder setDepthBias:pip->cmn.depth.bias slopeScale:pip->cmn.depth.bias_slope_scale clamp:pip->cmn.depth.bias_clamp]; - _sg_stats_add(metal.pipeline.num_set_depth_bias, 1); - SOKOL_ASSERT(pip->mtl.rps != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setRenderPipelineState:_sg_mtl_id(pip->mtl.rps)]; - _sg_stats_add(metal.pipeline.num_set_render_pipeline_state, 1); - SOKOL_ASSERT(pip->mtl.dss != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setDepthStencilState:_sg_mtl_id(pip->mtl.dss)]; - _sg_stats_add(metal.pipeline.num_set_depth_stencil_state, 1); + if (pip->cmn.is_compute) { + SOKOL_ASSERT(_sg.cur_pass.is_compute); + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + SOKOL_ASSERT(pip->mtl.cps != _SG_MTL_INVALID_SLOT_INDEX); + [_sg.mtl.compute_cmd_encoder setComputePipelineState:_sg_mtl_id(pip->mtl.cps)]; + } else { + SOKOL_ASSERT(!_sg.cur_pass.is_compute); + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); + sg_color c = pip->cmn.blend_color; + [_sg.mtl.render_cmd_encoder setBlendColorRed:c.r green:c.g blue:c.b alpha:c.a]; + _sg_stats_add(metal.pipeline.num_set_blend_color, 1); + [_sg.mtl.render_cmd_encoder setCullMode:pip->mtl.cull_mode]; + _sg_stats_add(metal.pipeline.num_set_cull_mode, 1); + [_sg.mtl.render_cmd_encoder setFrontFacingWinding:pip->mtl.winding]; + _sg_stats_add(metal.pipeline.num_set_front_facing_winding, 1); + [_sg.mtl.render_cmd_encoder setStencilReferenceValue:pip->mtl.stencil_ref]; + _sg_stats_add(metal.pipeline.num_set_stencil_reference_value, 1); + [_sg.mtl.render_cmd_encoder setDepthBias:pip->cmn.depth.bias slopeScale:pip->cmn.depth.bias_slope_scale clamp:pip->cmn.depth.bias_clamp]; + _sg_stats_add(metal.pipeline.num_set_depth_bias, 1); + SOKOL_ASSERT(pip->mtl.rps != _SG_MTL_INVALID_SLOT_INDEX); + [_sg.mtl.render_cmd_encoder setRenderPipelineState:_sg_mtl_id(pip->mtl.rps)]; + _sg_stats_add(metal.pipeline.num_set_render_pipeline_state, 1); + SOKOL_ASSERT(pip->mtl.dss != _SG_MTL_INVALID_SLOT_INDEX); + [_sg.mtl.render_cmd_encoder setDepthStencilState:_sg_mtl_id(pip->mtl.dss)]; + _sg_stats_add(metal.pipeline.num_set_depth_stencil_state, 1); + } } } _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) { SOKOL_ASSERT(bnd); SOKOL_ASSERT(bnd->pip); - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); SOKOL_ASSERT(bnd->pip && bnd->pip->shader); SOKOL_ASSERT(bnd->pip->shader->slot.id == bnd->pip->cmn.shader_id.id); const _sg_shader_t* shd = bnd->pip->shader; - // store index buffer binding, this will be needed later in sg_draw() - _sg.mtl.state_cache.cur_indexbuffer = bnd->ib; - _sg.mtl.state_cache.cur_indexbuffer_offset = bnd->ib_offset; - if (bnd->ib) { - SOKOL_ASSERT(bnd->pip->cmn.index_type != SG_INDEXTYPE_NONE); - _sg.mtl.state_cache.cur_indexbuffer_id.id = bnd->ib->slot.id; - } else { - SOKOL_ASSERT(bnd->pip->cmn.index_type == SG_INDEXTYPE_NONE); - _sg.mtl.state_cache.cur_indexbuffer_id.id = SG_INVALID_ID; - } - - // apply vertex buffers - for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { - const _sg_buffer_t* vb = bnd->vbs[i]; - if (vb == 0) { - continue; + // don't set vertex- and index-buffers in compute passes + if (!_sg.cur_pass.is_compute) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); + // store index buffer binding, this will be needed later in sg_draw() + _sg.mtl.state_cache.cur_indexbuffer = bnd->ib; + _sg.mtl.state_cache.cur_indexbuffer_offset = bnd->ib_offset; + if (bnd->ib) { + SOKOL_ASSERT(bnd->pip->cmn.index_type != SG_INDEXTYPE_NONE); + _sg.mtl.state_cache.cur_indexbuffer_id.id = bnd->ib->slot.id; + } else { + SOKOL_ASSERT(bnd->pip->cmn.index_type == SG_INDEXTYPE_NONE); + _sg.mtl.state_cache.cur_indexbuffer_id.id = SG_INVALID_ID; } - const NSUInteger mtl_slot = _SG_MTL_MAX_STAGE_UB_SBUF_BINDINGS + i; - SOKOL_ASSERT(mtl_slot < _SG_MTL_MAX_STAGE_BUFFER_BINDINGS); - const int vb_offset = bnd->vb_offsets[i]; - if ((_sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id != vb->slot.id) || - (_sg.mtl.state_cache.cur_vs_buffer_offsets[mtl_slot] != vb_offset)) - { - _sg.mtl.state_cache.cur_vs_buffer_offsets[mtl_slot] = vb_offset; - if (_sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id != vb->slot.id) { - _sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id = vb->slot.id; - SOKOL_ASSERT(vb->mtl.buf[vb->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setVertexBuffer:_sg_mtl_id(vb->mtl.buf[vb->cmn.active_slot]) - offset:(NSUInteger)vb_offset - atIndex:mtl_slot]; - } else { - [_sg.mtl.cmd_encoder setVertexBufferOffset:(NSUInteger)vb_offset atIndex:mtl_slot]; + // apply vertex buffers + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + const _sg_buffer_t* vb = bnd->vbs[i]; + if (vb == 0) { + continue; + } + const NSUInteger mtl_slot = _sg_mtl_vertexbuffer_bindslot(i); + SOKOL_ASSERT(mtl_slot < _SG_MTL_MAX_STAGE_BUFFER_BINDINGS); + const int vb_offset = bnd->vb_offsets[i]; + if ((_sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id != vb->slot.id) || + (_sg.mtl.state_cache.cur_vs_buffer_offsets[mtl_slot] != vb_offset)) + { + _sg.mtl.state_cache.cur_vs_buffer_offsets[mtl_slot] = vb_offset; + if (_sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id != vb->slot.id) { + // vertex buffer has changed + _sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id = vb->slot.id; + SOKOL_ASSERT(vb->mtl.buf[vb->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); + [_sg.mtl.render_cmd_encoder setVertexBuffer:_sg_mtl_id(vb->mtl.buf[vb->cmn.active_slot]) + offset:(NSUInteger)vb_offset + atIndex:mtl_slot]; + } else { + // only vertex buffer offset has changed + [_sg.mtl.render_cmd_encoder setVertexBufferOffset:(NSUInteger)vb_offset atIndex:mtl_slot]; + } + _sg_stats_add(metal.bindings.num_set_vertex_buffer, 1); } - _sg_stats_add(metal.bindings.num_set_vertex_buffer, 1); } } @@ -13454,21 +14240,30 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) { } SOKOL_ASSERT(img->mtl.tex[img->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); const sg_shader_stage stage = shd->cmn.images[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); + SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT) || (stage == SG_SHADERSTAGE_COMPUTE)); const NSUInteger mtl_slot = shd->mtl.img_texture_n[i]; SOKOL_ASSERT(mtl_slot < _SG_MTL_MAX_STAGE_IMAGE_BINDINGS); if (stage == SG_SHADERSTAGE_VERTEX) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_vs_image_ids[mtl_slot].id != img->slot.id) { _sg.mtl.state_cache.cur_vs_image_ids[mtl_slot].id = img->slot.id; - [_sg.mtl.cmd_encoder setVertexTexture:_sg_mtl_id(img->mtl.tex[img->cmn.active_slot]) atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setVertexTexture:_sg_mtl_id(img->mtl.tex[img->cmn.active_slot]) atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_vertex_texture, 1); } } else if (stage == SG_SHADERSTAGE_FRAGMENT) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_fs_image_ids[mtl_slot].id != img->slot.id) { _sg.mtl.state_cache.cur_fs_image_ids[mtl_slot].id = img->slot.id; - [_sg.mtl.cmd_encoder setFragmentTexture:_sg_mtl_id(img->mtl.tex[img->cmn.active_slot]) atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setFragmentTexture:_sg_mtl_id(img->mtl.tex[img->cmn.active_slot]) atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_fragment_texture, 1); } + } else if (stage == SG_SHADERSTAGE_COMPUTE) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + if (_sg.mtl.state_cache.cur_cs_image_ids[mtl_slot].id != img->slot.id) { + _sg.mtl.state_cache.cur_cs_image_ids[mtl_slot].id = img->slot.id; + [_sg.mtl.compute_cmd_encoder setTexture:_sg_mtl_id(img->mtl.tex[img->cmn.active_slot]) atIndex:mtl_slot]; + _sg_stats_add(metal.bindings.num_set_compute_texture, 1); + } } } @@ -13478,24 +14273,32 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) { if (smp == 0) { continue; } + SOKOL_ASSERT(smp->mtl.sampler_state != _SG_MTL_INVALID_SLOT_INDEX); const sg_shader_stage stage = shd->cmn.samplers[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); + SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT) || (stage == SG_SHADERSTAGE_COMPUTE)); const NSUInteger mtl_slot = shd->mtl.smp_sampler_n[i]; SOKOL_ASSERT(mtl_slot < _SG_MTL_MAX_STAGE_SAMPLER_BINDINGS); if (stage == SG_SHADERSTAGE_VERTEX) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_vs_sampler_ids[mtl_slot].id != smp->slot.id) { _sg.mtl.state_cache.cur_vs_sampler_ids[mtl_slot].id = smp->slot.id; - SOKOL_ASSERT(smp->mtl.sampler_state != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setVertexSamplerState:_sg_mtl_id(smp->mtl.sampler_state) atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setVertexSamplerState:_sg_mtl_id(smp->mtl.sampler_state) atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_vertex_sampler_state, 1); } } else if (stage == SG_SHADERSTAGE_FRAGMENT) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_fs_sampler_ids[mtl_slot].id != smp->slot.id) { _sg.mtl.state_cache.cur_fs_sampler_ids[mtl_slot].id = smp->slot.id; - SOKOL_ASSERT(smp->mtl.sampler_state != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setFragmentSamplerState:_sg_mtl_id(smp->mtl.sampler_state) atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setFragmentSamplerState:_sg_mtl_id(smp->mtl.sampler_state) atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_fragment_sampler_state, 1); } + } else if (stage == SG_SHADERSTAGE_COMPUTE) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + if (_sg.mtl.state_cache.cur_cs_sampler_ids[mtl_slot].id != smp->slot.id) { + _sg.mtl.state_cache.cur_cs_sampler_ids[mtl_slot].id = smp->slot.id; + [_sg.mtl.compute_cmd_encoder setSamplerState:_sg_mtl_id(smp->mtl.sampler_state) atIndex:mtl_slot]; + _sg_stats_add(metal.bindings.num_set_compute_sampler_state, 1); + } } } @@ -13505,31 +14308,38 @@ _SOKOL_PRIVATE bool _sg_mtl_apply_bindings(_sg_bindings_t* bnd) { if (sbuf == 0) { continue; } + SOKOL_ASSERT(sbuf->mtl.buf[sbuf->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); const sg_shader_stage stage = shd->cmn.storage_buffers[i].stage; - SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT)); + SOKOL_ASSERT((stage == SG_SHADERSTAGE_VERTEX) || (stage == SG_SHADERSTAGE_FRAGMENT) || (stage == SG_SHADERSTAGE_COMPUTE)); const NSUInteger mtl_slot = shd->mtl.sbuf_buffer_n[i]; SOKOL_ASSERT(mtl_slot < _SG_MTL_MAX_STAGE_UB_SBUF_BINDINGS); if (stage == SG_SHADERSTAGE_VERTEX) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id != sbuf->slot.id) { _sg.mtl.state_cache.cur_vs_buffer_ids[mtl_slot].id = sbuf->slot.id; - SOKOL_ASSERT(sbuf->mtl.buf[sbuf->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setVertexBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setVertexBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_vertex_buffer, 1); } } else if (stage == SG_SHADERSTAGE_FRAGMENT) { + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); if (_sg.mtl.state_cache.cur_fs_buffer_ids[mtl_slot].id != sbuf->slot.id) { _sg.mtl.state_cache.cur_fs_buffer_ids[mtl_slot].id = sbuf->slot.id; - SOKOL_ASSERT(sbuf->mtl.buf[sbuf->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); - [_sg.mtl.cmd_encoder setFragmentBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot]; + [_sg.mtl.render_cmd_encoder setFragmentBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot]; _sg_stats_add(metal.bindings.num_set_fragment_buffer, 1); } + } else if (stage == SG_SHADERSTAGE_COMPUTE) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + if (_sg.mtl.state_cache.cur_cs_buffer_ids[mtl_slot].id != sbuf->slot.id) { + _sg.mtl.state_cache.cur_cs_buffer_ids[mtl_slot].id = sbuf->slot.id; + [_sg.mtl.compute_cmd_encoder setBuffer:_sg_mtl_id(sbuf->mtl.buf[sbuf->cmn.active_slot]) offset:0 atIndex:mtl_slot]; + _sg_stats_add(metal.bindings.num_set_compute_buffer, 1); + } } } return true; } _SOKOL_PRIVATE void _sg_mtl_apply_uniforms(int ub_slot, const sg_range* data) { - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); SOKOL_ASSERT((ub_slot >= 0) && (ub_slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS)); SOKOL_ASSERT(((size_t)_sg.mtl.cur_ub_offset + data->size) <= (size_t)_sg.mtl.ub_size); SOKOL_ASSERT((_sg.mtl.cur_ub_offset & (_SG_MTL_UB_ALIGN-1)) == 0); @@ -13547,11 +14357,17 @@ _SOKOL_PRIVATE void _sg_mtl_apply_uniforms(int ub_slot, const sg_range* data) { uint8_t* dst = &_sg.mtl.cur_ub_base_ptr[_sg.mtl.cur_ub_offset]; memcpy(dst, data->ptr, data->size); if (stage == SG_SHADERSTAGE_VERTEX) { - [_sg.mtl.cmd_encoder setVertexBufferOffset:(NSUInteger)_sg.mtl.cur_ub_offset atIndex:mtl_slot]; + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); + [_sg.mtl.render_cmd_encoder setVertexBufferOffset:(NSUInteger)_sg.mtl.cur_ub_offset atIndex:mtl_slot]; _sg_stats_add(metal.uniforms.num_set_vertex_buffer_offset, 1); } else if (stage == SG_SHADERSTAGE_FRAGMENT) { - [_sg.mtl.cmd_encoder setFragmentBufferOffset:(NSUInteger)_sg.mtl.cur_ub_offset atIndex:mtl_slot]; + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); + [_sg.mtl.render_cmd_encoder setFragmentBufferOffset:(NSUInteger)_sg.mtl.cur_ub_offset atIndex:mtl_slot]; _sg_stats_add(metal.uniforms.num_set_fragment_buffer_offset, 1); + } else if (stage == SG_SHADERSTAGE_COMPUTE) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + [_sg.mtl.compute_cmd_encoder setBufferOffset:(NSUInteger)_sg.mtl.cur_ub_offset atIndex:mtl_slot]; + _sg_stats_add(metal.uniforms.num_set_compute_buffer_offset, 1); } else { SOKOL_UNREACHABLE; } @@ -13559,7 +14375,7 @@ _SOKOL_PRIVATE void _sg_mtl_apply_uniforms(int ub_slot, const sg_range* data) { } _SOKOL_PRIVATE void _sg_mtl_draw(int base_element, int num_elements, int num_instances) { - SOKOL_ASSERT(nil != _sg.mtl.cmd_encoder); + SOKOL_ASSERT(nil != _sg.mtl.render_cmd_encoder); SOKOL_ASSERT(_sg.mtl.state_cache.cur_pipeline && (_sg.mtl.state_cache.cur_pipeline->slot.id == _sg.mtl.state_cache.cur_pipeline_id.id)); if (SG_INDEXTYPE_NONE != _sg.mtl.state_cache.cur_pipeline->cmn.index_type) { // indexed rendering @@ -13567,7 +14383,7 @@ _SOKOL_PRIVATE void _sg_mtl_draw(int base_element, int num_elements, int num_ins const _sg_buffer_t* ib = _sg.mtl.state_cache.cur_indexbuffer; SOKOL_ASSERT(ib->mtl.buf[ib->cmn.active_slot] != _SG_MTL_INVALID_SLOT_INDEX); const NSUInteger index_buffer_offset = (NSUInteger) (_sg.mtl.state_cache.cur_indexbuffer_offset + base_element * _sg.mtl.state_cache.cur_pipeline->mtl.index_size); - [_sg.mtl.cmd_encoder drawIndexedPrimitives:_sg.mtl.state_cache.cur_pipeline->mtl.prim_type + [_sg.mtl.render_cmd_encoder drawIndexedPrimitives:_sg.mtl.state_cache.cur_pipeline->mtl.prim_type indexCount:(NSUInteger)num_elements indexType:_sg.mtl.state_cache.cur_pipeline->mtl.index_type indexBuffer:_sg_mtl_id(ib->mtl.buf[ib->cmn.active_slot]) @@ -13575,13 +14391,25 @@ _SOKOL_PRIVATE void _sg_mtl_draw(int base_element, int num_elements, int num_ins instanceCount:(NSUInteger)num_instances]; } else { // non-indexed rendering - [_sg.mtl.cmd_encoder drawPrimitives:_sg.mtl.state_cache.cur_pipeline->mtl.prim_type + [_sg.mtl.render_cmd_encoder drawPrimitives:_sg.mtl.state_cache.cur_pipeline->mtl.prim_type vertexStart:(NSUInteger)base_element vertexCount:(NSUInteger)num_elements instanceCount:(NSUInteger)num_instances]; } } +_SOKOL_PRIVATE void _sg_mtl_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + SOKOL_ASSERT(nil != _sg.mtl.compute_cmd_encoder); + SOKOL_ASSERT(_sg.mtl.state_cache.cur_pipeline && (_sg.mtl.state_cache.cur_pipeline->slot.id == _sg.mtl.state_cache.cur_pipeline_id.id)); + const _sg_pipeline_t* cur_pip = _sg.mtl.state_cache.cur_pipeline; + const MTLSize thread_groups = MTLSizeMake( + (NSUInteger)num_groups_x, + (NSUInteger)num_groups_y, + (NSUInteger)num_groups_z); + const MTLSize threads_per_threadgroup = cur_pip->mtl.threads_per_threadgroup; + [_sg.mtl.compute_cmd_encoder dispatchThreadgroups:thread_groups threadsPerThreadgroup:threads_per_threadgroup]; +} + _SOKOL_PRIVATE void _sg_mtl_update_buffer(_sg_buffer_t* buf, const sg_range* data) { SOKOL_ASSERT(buf && data && data->ptr && (data->size > 0)); if (++buf->cmn.active_slot >= buf->cmn.num_slots) { @@ -13626,14 +14454,18 @@ _SOKOL_PRIVATE void _sg_mtl_update_image(_sg_image_t* img, const sg_image_data* _SOKOL_PRIVATE void _sg_mtl_push_debug_group(const char* name) { SOKOL_ASSERT(name); - if (_sg.mtl.cmd_encoder) { - [_sg.mtl.cmd_encoder pushDebugGroup:[NSString stringWithUTF8String:name]]; + if (_sg.mtl.render_cmd_encoder) { + [_sg.mtl.render_cmd_encoder pushDebugGroup:[NSString stringWithUTF8String:name]]; + } else if (_sg.mtl.compute_cmd_encoder) { + [_sg.mtl.compute_cmd_encoder pushDebugGroup:[NSString stringWithUTF8String:name]]; } } _SOKOL_PRIVATE void _sg_mtl_pop_debug_group(void) { - if (_sg.mtl.cmd_encoder) { - [_sg.mtl.cmd_encoder popDebugGroup]; + if (_sg.mtl.render_cmd_encoder) { + [_sg.mtl.render_cmd_encoder popDebugGroup]; + } else if (_sg.mtl.compute_cmd_encoder) { + [_sg.mtl.compute_cmd_encoder popDebugGroup]; } } @@ -14036,6 +14868,7 @@ _SOKOL_PRIVATE WGPUShaderStage _sg_wgpu_shader_stage(sg_shader_stage stage) { switch (stage) { case SG_SHADERSTAGE_VERTEX: return WGPUShaderStage_Vertex; case SG_SHADERSTAGE_FRAGMENT: return WGPUShaderStage_Fragment; + case SG_SHADERSTAGE_COMPUTE: return WGPUShaderStage_Compute; default: SOKOL_UNREACHABLE; return WGPUShaderStage_None; } } @@ -14046,7 +14879,7 @@ _SOKOL_PRIVATE void _sg_wgpu_init_caps(void) { _sg.features.image_clamp_to_border = false; _sg.features.mrt_independent_blend_state = true; _sg.features.mrt_independent_write_mask = true; - _sg.features.storage_buffer = true; + _sg.features.compute = true; _sg.features.msaa_image_bindings = true; wgpuDeviceGetLimits(_sg.wgpu.dev, &_sg.wgpu.limits); @@ -14189,7 +15022,7 @@ _SOKOL_PRIVATE void _sg_wgpu_bindgroups_pool_init(const sg_desc* desc) { _sg_wgpu_bindgroups_pool_t* p = &_sg.wgpu.bindgroups_pool; SOKOL_ASSERT(0 == p->bindgroups); const int pool_size = desc->wgpu_bindgroups_cache_size; - _sg_init_pool(&p->pool, pool_size); + _sg_pool_init(&p->pool, pool_size); size_t pool_byte_size = sizeof(_sg_wgpu_bindgroup_t) * (size_t)p->pool.size; p->bindgroups = (_sg_wgpu_bindgroup_t*) _sg_malloc_clear(pool_byte_size); } @@ -14198,7 +15031,7 @@ _SOKOL_PRIVATE void _sg_wgpu_bindgroups_pool_discard(void) { _sg_wgpu_bindgroups_pool_t* p = &_sg.wgpu.bindgroups_pool; SOKOL_ASSERT(p->bindgroups); _sg_free(p->bindgroups); p->bindgroups = 0; - _sg_discard_pool(&p->pool); + _sg_pool_discard(&p->pool); } _SOKOL_PRIVATE _sg_wgpu_bindgroup_t* _sg_wgpu_bindgroup_at(uint32_t bg_id) { @@ -14236,7 +15069,7 @@ _SOKOL_PRIVATE void _sg_wgpu_dealloc_bindgroup(_sg_wgpu_bindgroup_t* bg) { SOKOL_ASSERT(bg && (bg->slot.state == SG_RESOURCESTATE_ALLOC) && (bg->slot.id != SG_INVALID_ID)); _sg_wgpu_bindgroups_pool_t* p = &_sg.wgpu.bindgroups_pool; _sg_pool_free_index(&p->pool, _sg_slot_index(bg->slot.id)); - _sg_reset_slot(&bg->slot); + _sg_slot_reset(&bg->slot); } _SOKOL_PRIVATE void _sg_wgpu_reset_bindgroup_to_alloc_state(_sg_wgpu_bindgroup_t* bg) { @@ -14597,12 +15430,24 @@ _SOKOL_PRIVATE void _sg_wgpu_set_img_smp_sbuf_bindgroup(_sg_wgpu_bindgroup_t* bg if (_sg_wgpu_bindings_cache_bg_dirty(bg)) { _sg_wgpu_bindings_cache_bg_update(bg); _sg_stats_add(wgpu.bindings.num_set_bindgroup, 1); - if (bg) { - SOKOL_ASSERT(bg->slot.state == SG_RESOURCESTATE_VALID); - SOKOL_ASSERT(bg->bindgroup); - wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.pass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, bg->bindgroup, 0, 0); + if (_sg.cur_pass.is_compute) { + SOKOL_ASSERT(_sg.wgpu.cpass_enc); + if (bg) { + SOKOL_ASSERT(bg->slot.state == SG_RESOURCESTATE_VALID); + SOKOL_ASSERT(bg->bindgroup); + wgpuComputePassEncoderSetBindGroup(_sg.wgpu.cpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, bg->bindgroup, 0, 0); + } else { + wgpuComputePassEncoderSetBindGroup(_sg.wgpu.cpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + } } else { - wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.pass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); + if (bg) { + SOKOL_ASSERT(bg->slot.state == SG_RESOURCESTATE_VALID); + SOKOL_ASSERT(bg->bindgroup); + wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.rpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, bg->bindgroup, 0, 0); + } else { + wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.rpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + } } } else { _sg_stats_add(wgpu.bindings.num_skip_redundant_bindgroup, 1); @@ -14657,6 +15502,7 @@ _SOKOL_PRIVATE bool _sg_wgpu_apply_bindgroup(_sg_bindings_t* bnd) { } _SOKOL_PRIVATE bool _sg_wgpu_apply_index_buffer(_sg_bindings_t* bnd) { + SOKOL_ASSERT(_sg.wgpu.rpass_enc); const _sg_buffer_t* ib = bnd->ib; uint64_t offset = (uint64_t)bnd->ib_offset; if (_sg_wgpu_bindings_cache_ib_dirty(ib, offset)) { @@ -14666,10 +15512,10 @@ _SOKOL_PRIVATE bool _sg_wgpu_apply_index_buffer(_sg_bindings_t* bnd) { const uint64_t buf_size = (uint64_t)ib->cmn.size; SOKOL_ASSERT(buf_size > offset); const uint64_t max_bytes = buf_size - offset; - wgpuRenderPassEncoderSetIndexBuffer(_sg.wgpu.pass_enc, ib->wgpu.buf, format, offset, max_bytes); + wgpuRenderPassEncoderSetIndexBuffer(_sg.wgpu.rpass_enc, ib->wgpu.buf, format, offset, max_bytes); /* FIXME: the else-pass should actually set a null index buffer, but that doesn't seem to work yet } else { - wgpuRenderPassEncoderSetIndexBuffer(_sg.wgpu.pass_enc, 0, WGPUIndexFormat_Undefined, 0, 0); + wgpuRenderPassEncoderSetIndexBuffer(_sg.wgpu.rpass_enc, 0, WGPUIndexFormat_Undefined, 0, 0); */ } _sg_stats_add(wgpu.bindings.num_set_index_buffer, 1); @@ -14680,7 +15526,8 @@ _SOKOL_PRIVATE bool _sg_wgpu_apply_index_buffer(_sg_bindings_t* bnd) { } _SOKOL_PRIVATE bool _sg_wgpu_apply_vertex_buffers(_sg_bindings_t* bnd) { - for (size_t slot = 0; slot < SG_MAX_VERTEXBUFFER_BINDSLOTS; slot++) { + SOKOL_ASSERT(_sg.wgpu.rpass_enc); + for (uint32_t slot = 0; slot < SG_MAX_VERTEXBUFFER_BINDSLOTS; slot++) { const _sg_buffer_t* vb = bnd->vbs[slot]; const uint64_t offset = (uint64_t)bnd->vb_offsets[slot]; if (_sg_wgpu_bindings_cache_vb_dirty(slot, vb, offset)) { @@ -14689,10 +15536,10 @@ _SOKOL_PRIVATE bool _sg_wgpu_apply_vertex_buffers(_sg_bindings_t* bnd) { const uint64_t buf_size = (uint64_t)vb->cmn.size; SOKOL_ASSERT(buf_size > offset); const uint64_t max_bytes = buf_size - offset; - wgpuRenderPassEncoderSetVertexBuffer(_sg.wgpu.pass_enc, slot, vb->wgpu.buf, offset, max_bytes); + wgpuRenderPassEncoderSetVertexBuffer(_sg.wgpu.rpass_enc, slot, vb->wgpu.buf, offset, max_bytes); /* FIXME: the else-pass should actually set a null vertex buffer, but that doesn't seem to work yet } else { - wgpuRenderPassEncoderSetVertexBuffer(_sg.wgpu.pass_enc, slot, 0, 0, 0); + wgpuRenderPassEncoderSetVertexBuffer(_sg.wgpu.rpass_enc, slot, 0, 0, 0); */ } _sg_stats_add(wgpu.bindings.num_set_vertex_buffer, 1); @@ -14757,6 +15604,7 @@ _SOKOL_PRIVATE void _sg_wgpu_reset_state_cache(void) { _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_buffer(_sg_buffer_t* buf, const sg_buffer_desc* desc) { SOKOL_ASSERT(buf && desc); + SOKOL_ASSERT(buf->cmn.size > 0); const bool injected = (0 != desc->wgpu_buffer); if (injected) { buf->wgpu.buf = (WGPUBuffer) desc->wgpu_buffer; @@ -14765,7 +15613,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_buffer(_sg_buffer_t* buf, const // buffer mapping size must be multiple of 4, so round up buffer size (only a problem // with index buffers containing odd number of indices) const uint64_t wgpu_buf_size = _sg_roundup_u64((uint64_t)buf->cmn.size, 4); - const bool map_at_creation = (SG_USAGE_IMMUTABLE == buf->cmn.usage); + const bool map_at_creation = (SG_USAGE_IMMUTABLE == buf->cmn.usage) && (desc->data.ptr); WGPUBufferDescriptor wgpu_buf_desc; _sg_clear(&wgpu_buf_desc, sizeof(wgpu_buf_desc)); @@ -14778,6 +15626,7 @@ _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_buffer(_sg_buffer_t* buf, const _SG_ERROR(WGPU_CREATE_BUFFER_FAILED); return SG_RESOURCESTATE_FAILED; } + // NOTE: assume that WebGPU creates zero-initialized buffers if (map_at_creation) { SOKOL_ASSERT(desc->data.ptr && (desc->data.size > 0)); SOKOL_ASSERT(desc->data.size <= (size_t)buf->cmn.size); @@ -15004,6 +15853,9 @@ _SOKOL_PRIVATE _sg_wgpu_shader_func_t _sg_wgpu_create_shader_func(const sg_shade wgpu_shdmod_desc.nextInChain = &wgpu_shdmod_wgsl_desc.chain; wgpu_shdmod_desc.label = _sg_wgpu_stringview(label); + // NOTE: if compilation fails we won't actually find out in this call since + // it always returns a valid module handle, and the GetCompilationInfo() call + // is asynchronous res.module = wgpuDeviceCreateShaderModule(_sg.wgpu.dev, &wgpu_shdmod_desc); if (0 == res.module) { _SG_ERROR(WGPU_CREATE_SHADER_MODULE_FAILED); @@ -15060,9 +15912,9 @@ _SOKOL_PRIVATE bool _sg_wgpu_ensure_wgsl_bindslot_ranges(const sg_shader_desc* d _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_shader(_sg_shader_t* shd, const sg_shader_desc* desc) { SOKOL_ASSERT(shd && desc); - SOKOL_ASSERT(desc->vertex_func.source && desc->fragment_func.source); SOKOL_ASSERT(shd->wgpu.vertex_func.module == 0); SOKOL_ASSERT(shd->wgpu.fragment_func.module == 0); + SOKOL_ASSERT(shd->wgpu.compute_func.module == 0); SOKOL_ASSERT(shd->wgpu.bgl_ub == 0); SOKOL_ASSERT(shd->wgpu.bg_ub == 0); SOKOL_ASSERT(shd->wgpu.bgl_img_smp_sbuf == 0); @@ -15075,9 +15927,23 @@ _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_shader(_sg_shader_t* shd, const } // build shader modules - shd->wgpu.vertex_func = _sg_wgpu_create_shader_func(&desc->vertex_func, desc->label); - shd->wgpu.fragment_func = _sg_wgpu_create_shader_func(&desc->fragment_func, desc->label); - if ((shd->wgpu.vertex_func.module == 0) || (shd->wgpu.fragment_func.module == 0)) { + bool shd_valid = true; + if (desc->vertex_func.source) { + shd->wgpu.vertex_func = _sg_wgpu_create_shader_func(&desc->vertex_func, desc->label); + shd_valid &= shd->wgpu.vertex_func.module != 0; + } + if (desc->fragment_func.source) { + shd->wgpu.fragment_func = _sg_wgpu_create_shader_func(&desc->fragment_func, desc->label); + shd_valid &= shd->wgpu.fragment_func.module != 0; + } + if (desc->compute_func.source) { + shd->wgpu.compute_func = _sg_wgpu_create_shader_func(&desc->compute_func, desc->label); + shd_valid &= shd->wgpu.compute_func.module != 0; + } + if (!shd_valid) { + _sg_wgpu_discard_shader_func(&shd->wgpu.vertex_func); + _sg_wgpu_discard_shader_func(&shd->wgpu.fragment_func); + _sg_wgpu_discard_shader_func(&shd->wgpu.compute_func); return SG_RESOURCESTATE_FAILED; } @@ -15192,6 +16058,7 @@ _SOKOL_PRIVATE void _sg_wgpu_discard_shader(_sg_shader_t* shd) { SOKOL_ASSERT(shd); _sg_wgpu_discard_shader_func(&shd->wgpu.vertex_func); _sg_wgpu_discard_shader_func(&shd->wgpu.fragment_func); + _sg_wgpu_discard_shader_func(&shd->wgpu.compute_func); if (shd->wgpu.bgl_ub) { wgpuBindGroupLayoutRelease(shd->wgpu.bgl_ub); shd->wgpu.bgl_ub = 0; @@ -15235,103 +16102,118 @@ _SOKOL_PRIVATE sg_resource_state _sg_wgpu_create_pipeline(_sg_pipeline_t* pip, _ } SOKOL_ASSERT(wgpu_pip_layout); - WGPUVertexBufferLayout wgpu_vb_layouts[SG_MAX_VERTEXBUFFER_BINDSLOTS]; - _sg_clear(wgpu_vb_layouts, sizeof(wgpu_vb_layouts)); - WGPUVertexAttribute wgpu_vtx_attrs[SG_MAX_VERTEXBUFFER_BINDSLOTS][SG_MAX_VERTEX_ATTRIBUTES]; - _sg_clear(wgpu_vtx_attrs, sizeof(wgpu_vtx_attrs)); - int wgpu_vb_num = 0; - for (int vb_idx = 0; vb_idx < SG_MAX_VERTEXBUFFER_BINDSLOTS; vb_idx++, wgpu_vb_num++) { - const sg_vertex_buffer_layout_state* vbl_state = &desc->layout.buffers[vb_idx]; - if (0 == vbl_state->stride) { - break; + if (pip->cmn.is_compute) { + WGPUComputePipelineDescriptor wgpu_pip_desc; + _sg_clear(&wgpu_pip_desc, sizeof(wgpu_pip_desc)); + wgpu_pip_desc.label = _sg_wgpu_stringview(desc->label); + wgpu_pip_desc.layout = wgpu_pip_layout; + wgpu_pip_desc.compute.module = shd->wgpu.compute_func.module; + wgpu_pip_desc.compute.entryPoint = shd->wgpu.compute_func.entry.buf; + pip->wgpu.cpip = wgpuDeviceCreateComputePipeline(_sg.wgpu.dev, &wgpu_pip_desc); + wgpuPipelineLayoutRelease(wgpu_pip_layout); + if (0 == pip->wgpu.cpip) { + _SG_ERROR(WGPU_CREATE_COMPUTE_PIPELINE_FAILED); + return SG_RESOURCESTATE_FAILED; } - wgpu_vb_layouts[vb_idx].arrayStride = (uint64_t)vbl_state->stride; - wgpu_vb_layouts[vb_idx].stepMode = _sg_wgpu_stepmode(vbl_state->step_func); - wgpu_vb_layouts[vb_idx].attributes = &wgpu_vtx_attrs[vb_idx][0]; - } - for (int va_idx = 0; va_idx < SG_MAX_VERTEX_ATTRIBUTES; va_idx++) { - const sg_vertex_attr_state* va_state = &desc->layout.attrs[va_idx]; - if (SG_VERTEXFORMAT_INVALID == va_state->format) { - break; - } - const int vb_idx = va_state->buffer_index; - SOKOL_ASSERT(vb_idx < SG_MAX_VERTEXBUFFER_BINDSLOTS); - SOKOL_ASSERT(pip->cmn.vertex_buffer_layout_active[vb_idx]); - const size_t wgpu_attr_idx = wgpu_vb_layouts[vb_idx].attributeCount; - wgpu_vb_layouts[vb_idx].attributeCount += 1; - wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].format = _sg_wgpu_vertexformat(va_state->format); - wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].offset = (uint64_t)va_state->offset; - wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].shaderLocation = (uint32_t)va_idx; - } - - WGPURenderPipelineDescriptor wgpu_pip_desc; - _sg_clear(&wgpu_pip_desc, sizeof(wgpu_pip_desc)); - WGPUDepthStencilState wgpu_ds_state; - _sg_clear(&wgpu_ds_state, sizeof(wgpu_ds_state)); - WGPUFragmentState wgpu_frag_state; - _sg_clear(&wgpu_frag_state, sizeof(wgpu_frag_state)); - WGPUColorTargetState wgpu_ctgt_state[SG_MAX_COLOR_ATTACHMENTS]; - _sg_clear(&wgpu_ctgt_state, sizeof(wgpu_ctgt_state)); - WGPUBlendState wgpu_blend_state[SG_MAX_COLOR_ATTACHMENTS]; - _sg_clear(&wgpu_blend_state, sizeof(wgpu_blend_state)); - wgpu_pip_desc.label = _sg_wgpu_stringview(desc->label); - wgpu_pip_desc.layout = wgpu_pip_layout; - wgpu_pip_desc.vertex.module = shd->wgpu.vertex_func.module; - wgpu_pip_desc.vertex.entryPoint = shd->wgpu.vertex_func.entry.buf; - wgpu_pip_desc.vertex.bufferCount = (size_t)wgpu_vb_num; - wgpu_pip_desc.vertex.buffers = &wgpu_vb_layouts[0]; - wgpu_pip_desc.primitive.topology = _sg_wgpu_topology(desc->primitive_type); - wgpu_pip_desc.primitive.stripIndexFormat = _sg_wgpu_stripindexformat(desc->primitive_type, desc->index_type); - wgpu_pip_desc.primitive.frontFace = _sg_wgpu_frontface(desc->face_winding); - wgpu_pip_desc.primitive.cullMode = _sg_wgpu_cullmode(desc->cull_mode); - if (SG_PIXELFORMAT_NONE != desc->depth.pixel_format) { - wgpu_ds_state.format = _sg_wgpu_textureformat(desc->depth.pixel_format); - wgpu_ds_state.depthWriteEnabled = _sg_wgpu_optional_bool(desc->depth.write_enabled); - wgpu_ds_state.depthCompare = _sg_wgpu_comparefunc(desc->depth.compare); - wgpu_ds_state.stencilFront.compare = _sg_wgpu_comparefunc(desc->stencil.front.compare); - wgpu_ds_state.stencilFront.failOp = _sg_wgpu_stencilop(desc->stencil.front.fail_op); - wgpu_ds_state.stencilFront.depthFailOp = _sg_wgpu_stencilop(desc->stencil.front.depth_fail_op); - wgpu_ds_state.stencilFront.passOp = _sg_wgpu_stencilop(desc->stencil.front.pass_op); - wgpu_ds_state.stencilBack.compare = _sg_wgpu_comparefunc(desc->stencil.back.compare); - wgpu_ds_state.stencilBack.failOp = _sg_wgpu_stencilop(desc->stencil.back.fail_op); - wgpu_ds_state.stencilBack.depthFailOp = _sg_wgpu_stencilop(desc->stencil.back.depth_fail_op); - wgpu_ds_state.stencilBack.passOp = _sg_wgpu_stencilop(desc->stencil.back.pass_op); - wgpu_ds_state.stencilReadMask = desc->stencil.read_mask; - wgpu_ds_state.stencilWriteMask = desc->stencil.write_mask; - wgpu_ds_state.depthBias = (int32_t)desc->depth.bias; - wgpu_ds_state.depthBiasSlopeScale = desc->depth.bias_slope_scale; - wgpu_ds_state.depthBiasClamp = desc->depth.bias_clamp; - wgpu_pip_desc.depthStencil = &wgpu_ds_state; - } - wgpu_pip_desc.multisample.count = (uint32_t)desc->sample_count; - wgpu_pip_desc.multisample.mask = 0xFFFFFFFF; - wgpu_pip_desc.multisample.alphaToCoverageEnabled = desc->alpha_to_coverage_enabled; - if (desc->color_count > 0) { - wgpu_frag_state.module = shd->wgpu.fragment_func.module; - wgpu_frag_state.entryPoint = shd->wgpu.fragment_func.entry.buf; - wgpu_frag_state.targetCount = (size_t)desc->color_count; - wgpu_frag_state.targets = &wgpu_ctgt_state[0]; - for (int i = 0; i < desc->color_count; i++) { - SOKOL_ASSERT(i < SG_MAX_COLOR_ATTACHMENTS); - wgpu_ctgt_state[i].format = _sg_wgpu_textureformat(desc->colors[i].pixel_format); - wgpu_ctgt_state[i].writeMask = _sg_wgpu_colorwritemask(desc->colors[i].write_mask); - if (desc->colors[i].blend.enabled) { - wgpu_ctgt_state[i].blend = &wgpu_blend_state[i]; - wgpu_blend_state[i].color.operation = _sg_wgpu_blendop(desc->colors[i].blend.op_rgb); - wgpu_blend_state[i].color.srcFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.src_factor_rgb); - wgpu_blend_state[i].color.dstFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.dst_factor_rgb); - wgpu_blend_state[i].alpha.operation = _sg_wgpu_blendop(desc->colors[i].blend.op_alpha); - wgpu_blend_state[i].alpha.srcFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.src_factor_alpha); - wgpu_blend_state[i].alpha.dstFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.dst_factor_alpha); + } else { + WGPUVertexBufferLayout wgpu_vb_layouts[SG_MAX_VERTEXBUFFER_BINDSLOTS]; + _sg_clear(wgpu_vb_layouts, sizeof(wgpu_vb_layouts)); + WGPUVertexAttribute wgpu_vtx_attrs[SG_MAX_VERTEXBUFFER_BINDSLOTS][SG_MAX_VERTEX_ATTRIBUTES]; + _sg_clear(wgpu_vtx_attrs, sizeof(wgpu_vtx_attrs)); + int wgpu_vb_num = 0; + for (int vb_idx = 0; vb_idx < SG_MAX_VERTEXBUFFER_BINDSLOTS; vb_idx++, wgpu_vb_num++) { + const sg_vertex_buffer_layout_state* vbl_state = &desc->layout.buffers[vb_idx]; + if (0 == vbl_state->stride) { + break; } + wgpu_vb_layouts[vb_idx].arrayStride = (uint64_t)vbl_state->stride; + wgpu_vb_layouts[vb_idx].stepMode = _sg_wgpu_stepmode(vbl_state->step_func); + wgpu_vb_layouts[vb_idx].attributes = &wgpu_vtx_attrs[vb_idx][0]; + } + for (int va_idx = 0; va_idx < SG_MAX_VERTEX_ATTRIBUTES; va_idx++) { + const sg_vertex_attr_state* va_state = &desc->layout.attrs[va_idx]; + if (SG_VERTEXFORMAT_INVALID == va_state->format) { + break; + } + const int vb_idx = va_state->buffer_index; + SOKOL_ASSERT(vb_idx < SG_MAX_VERTEXBUFFER_BINDSLOTS); + SOKOL_ASSERT(pip->cmn.vertex_buffer_layout_active[vb_idx]); + const size_t wgpu_attr_idx = wgpu_vb_layouts[vb_idx].attributeCount; + wgpu_vb_layouts[vb_idx].attributeCount += 1; + wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].format = _sg_wgpu_vertexformat(va_state->format); + wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].offset = (uint64_t)va_state->offset; + wgpu_vtx_attrs[vb_idx][wgpu_attr_idx].shaderLocation = (uint32_t)va_idx; + } + + WGPURenderPipelineDescriptor wgpu_pip_desc; + _sg_clear(&wgpu_pip_desc, sizeof(wgpu_pip_desc)); + WGPUDepthStencilState wgpu_ds_state; + _sg_clear(&wgpu_ds_state, sizeof(wgpu_ds_state)); + WGPUFragmentState wgpu_frag_state; + _sg_clear(&wgpu_frag_state, sizeof(wgpu_frag_state)); + WGPUColorTargetState wgpu_ctgt_state[SG_MAX_COLOR_ATTACHMENTS]; + _sg_clear(&wgpu_ctgt_state, sizeof(wgpu_ctgt_state)); + WGPUBlendState wgpu_blend_state[SG_MAX_COLOR_ATTACHMENTS]; + _sg_clear(&wgpu_blend_state, sizeof(wgpu_blend_state)); + wgpu_pip_desc.label = _sg_wgpu_stringview(desc->label); + wgpu_pip_desc.layout = wgpu_pip_layout; + wgpu_pip_desc.vertex.module = shd->wgpu.vertex_func.module; + wgpu_pip_desc.vertex.entryPoint = shd->wgpu.vertex_func.entry.buf; + wgpu_pip_desc.vertex.bufferCount = (size_t)wgpu_vb_num; + wgpu_pip_desc.vertex.buffers = &wgpu_vb_layouts[0]; + wgpu_pip_desc.primitive.topology = _sg_wgpu_topology(desc->primitive_type); + wgpu_pip_desc.primitive.stripIndexFormat = _sg_wgpu_stripindexformat(desc->primitive_type, desc->index_type); + wgpu_pip_desc.primitive.frontFace = _sg_wgpu_frontface(desc->face_winding); + wgpu_pip_desc.primitive.cullMode = _sg_wgpu_cullmode(desc->cull_mode); + if (SG_PIXELFORMAT_NONE != desc->depth.pixel_format) { + wgpu_ds_state.format = _sg_wgpu_textureformat(desc->depth.pixel_format); + wgpu_ds_state.depthWriteEnabled = _sg_wgpu_optional_bool(desc->depth.write_enabled); + wgpu_ds_state.depthCompare = _sg_wgpu_comparefunc(desc->depth.compare); + wgpu_ds_state.stencilFront.compare = _sg_wgpu_comparefunc(desc->stencil.front.compare); + wgpu_ds_state.stencilFront.failOp = _sg_wgpu_stencilop(desc->stencil.front.fail_op); + wgpu_ds_state.stencilFront.depthFailOp = _sg_wgpu_stencilop(desc->stencil.front.depth_fail_op); + wgpu_ds_state.stencilFront.passOp = _sg_wgpu_stencilop(desc->stencil.front.pass_op); + wgpu_ds_state.stencilBack.compare = _sg_wgpu_comparefunc(desc->stencil.back.compare); + wgpu_ds_state.stencilBack.failOp = _sg_wgpu_stencilop(desc->stencil.back.fail_op); + wgpu_ds_state.stencilBack.depthFailOp = _sg_wgpu_stencilop(desc->stencil.back.depth_fail_op); + wgpu_ds_state.stencilBack.passOp = _sg_wgpu_stencilop(desc->stencil.back.pass_op); + wgpu_ds_state.stencilReadMask = desc->stencil.read_mask; + wgpu_ds_state.stencilWriteMask = desc->stencil.write_mask; + wgpu_ds_state.depthBias = (int32_t)desc->depth.bias; + wgpu_ds_state.depthBiasSlopeScale = desc->depth.bias_slope_scale; + wgpu_ds_state.depthBiasClamp = desc->depth.bias_clamp; + wgpu_pip_desc.depthStencil = &wgpu_ds_state; + } + wgpu_pip_desc.multisample.count = (uint32_t)desc->sample_count; + wgpu_pip_desc.multisample.mask = 0xFFFFFFFF; + wgpu_pip_desc.multisample.alphaToCoverageEnabled = desc->alpha_to_coverage_enabled; + if (desc->color_count > 0) { + wgpu_frag_state.module = shd->wgpu.fragment_func.module; + wgpu_frag_state.entryPoint = shd->wgpu.fragment_func.entry.buf; + wgpu_frag_state.targetCount = (size_t)desc->color_count; + wgpu_frag_state.targets = &wgpu_ctgt_state[0]; + for (int i = 0; i < desc->color_count; i++) { + SOKOL_ASSERT(i < SG_MAX_COLOR_ATTACHMENTS); + wgpu_ctgt_state[i].format = _sg_wgpu_textureformat(desc->colors[i].pixel_format); + wgpu_ctgt_state[i].writeMask = _sg_wgpu_colorwritemask(desc->colors[i].write_mask); + if (desc->colors[i].blend.enabled) { + wgpu_ctgt_state[i].blend = &wgpu_blend_state[i]; + wgpu_blend_state[i].color.operation = _sg_wgpu_blendop(desc->colors[i].blend.op_rgb); + wgpu_blend_state[i].color.srcFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.src_factor_rgb); + wgpu_blend_state[i].color.dstFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.dst_factor_rgb); + wgpu_blend_state[i].alpha.operation = _sg_wgpu_blendop(desc->colors[i].blend.op_alpha); + wgpu_blend_state[i].alpha.srcFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.src_factor_alpha); + wgpu_blend_state[i].alpha.dstFactor = _sg_wgpu_blendfactor(desc->colors[i].blend.dst_factor_alpha); + } + } + wgpu_pip_desc.fragment = &wgpu_frag_state; + } + pip->wgpu.rpip = wgpuDeviceCreateRenderPipeline(_sg.wgpu.dev, &wgpu_pip_desc); + wgpuPipelineLayoutRelease(wgpu_pip_layout); + if (0 == pip->wgpu.rpip) { + _SG_ERROR(WGPU_CREATE_RENDER_PIPELINE_FAILED); + return SG_RESOURCESTATE_FAILED; } - wgpu_pip_desc.fragment = &wgpu_frag_state; - } - pip->wgpu.pip = wgpuDeviceCreateRenderPipeline(_sg.wgpu.dev, &wgpu_pip_desc); - wgpuPipelineLayoutRelease(wgpu_pip_layout); - if (0 == pip->wgpu.pip) { - _SG_ERROR(WGPU_CREATE_RENDER_PIPELINE_FAILED); - return SG_RESOURCESTATE_FAILED; } return SG_RESOURCESTATE_VALID; } @@ -15343,9 +16225,13 @@ _SOKOL_PRIVATE void _sg_wgpu_discard_pipeline(_sg_pipeline_t* pip) { _sg.wgpu.cur_pipeline = 0; _sg.wgpu.cur_pipeline_id.id = SG_INVALID_ID; } - if (pip->wgpu.pip) { - wgpuRenderPipelineRelease(pip->wgpu.pip); - pip->wgpu.pip = 0; + if (pip->wgpu.rpip) { + wgpuRenderPipelineRelease(pip->wgpu.rpip); + pip->wgpu.rpip = 0; + } + if (pip->wgpu.cpip) { + wgpuComputePipelineRelease(pip->wgpu.cpip); + pip->wgpu.cpip = 0; } } @@ -15485,18 +16371,23 @@ _SOKOL_PRIVATE void _sg_wgpu_init_ds_att(WGPURenderPassDepthStencilAttachment* w wgpu_att->stencilReadOnly = false; } -_SOKOL_PRIVATE void _sg_wgpu_begin_pass(const sg_pass* pass) { - SOKOL_ASSERT(pass); - SOKOL_ASSERT(_sg.wgpu.cmd_enc); - SOKOL_ASSERT(_sg.wgpu.dev); +_SOKOL_PRIVATE void _sg_wgpu_begin_compute_pass(const sg_pass* pass) { + WGPUComputePassDescriptor wgpu_pass_desc; + _sg_clear(&wgpu_pass_desc, sizeof(wgpu_pass_desc)); + wgpu_pass_desc.label = _sg_wgpu_stringview(pass->label); + _sg.wgpu.cpass_enc = wgpuCommandEncoderBeginComputePass(_sg.wgpu.cmd_enc, &wgpu_pass_desc); + SOKOL_ASSERT(_sg.wgpu.cpass_enc); + // clear initial bindings + wgpuComputePassEncoderSetBindGroup(_sg.wgpu.cpass_enc, _SG_WGPU_UB_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + wgpuComputePassEncoderSetBindGroup(_sg.wgpu.cpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + _sg_stats_add(wgpu.bindings.num_set_bindgroup, 1); +} +_SOKOL_PRIVATE void _sg_wgpu_begin_render_pass(const sg_pass* pass) { const _sg_attachments_t* atts = _sg.cur_pass.atts; const sg_swapchain* swapchain = &pass->swapchain; const sg_pass_action* action = &pass->action; - _sg.wgpu.cur_pipeline = 0; - _sg.wgpu.cur_pipeline_id.id = SG_INVALID_ID; - WGPURenderPassDescriptor wgpu_pass_desc; WGPURenderPassColorAttachment wgpu_color_att[SG_MAX_COLOR_ATTACHMENTS]; WGPURenderPassDepthStencilAttachment wgpu_ds_att; @@ -15528,21 +16419,42 @@ _SOKOL_PRIVATE void _sg_wgpu_begin_pass(const sg_pass* pass) { wgpu_pass_desc.depthStencilAttachment = &wgpu_ds_att; } } - _sg.wgpu.pass_enc = wgpuCommandEncoderBeginRenderPass(_sg.wgpu.cmd_enc, &wgpu_pass_desc); - SOKOL_ASSERT(_sg.wgpu.pass_enc); + _sg.wgpu.rpass_enc = wgpuCommandEncoderBeginRenderPass(_sg.wgpu.cmd_enc, &wgpu_pass_desc); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); - // clear bindings cache and bindings - _sg_wgpu_bindings_cache_clear(); - wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.pass_enc, _SG_WGPU_UB_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); - wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.pass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.rpass_enc, _SG_WGPU_UB_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); + wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.rpass_enc, _SG_WGPU_IMG_SMP_SBUF_BINDGROUP_INDEX, _sg.wgpu.empty_bind_group, 0, 0); _sg_stats_add(wgpu.bindings.num_set_bindgroup, 1); } +_SOKOL_PRIVATE void _sg_wgpu_begin_pass(const sg_pass* pass) { + SOKOL_ASSERT(pass); + SOKOL_ASSERT(_sg.wgpu.dev); + SOKOL_ASSERT(_sg.wgpu.cmd_enc); + SOKOL_ASSERT(0 == _sg.wgpu.rpass_enc); + SOKOL_ASSERT(0 == _sg.wgpu.cpass_enc); + + _sg.wgpu.cur_pipeline = 0; + _sg.wgpu.cur_pipeline_id.id = SG_INVALID_ID; + _sg_wgpu_bindings_cache_clear(); + + if (pass->compute) { + _sg_wgpu_begin_compute_pass(pass); + } else { + _sg_wgpu_begin_render_pass(pass); + } +} + _SOKOL_PRIVATE void _sg_wgpu_end_pass(void) { - if (_sg.wgpu.pass_enc) { - wgpuRenderPassEncoderEnd(_sg.wgpu.pass_enc); - wgpuRenderPassEncoderRelease(_sg.wgpu.pass_enc); - _sg.wgpu.pass_enc = 0; + if (_sg.wgpu.rpass_enc) { + wgpuRenderPassEncoderEnd(_sg.wgpu.rpass_enc); + wgpuRenderPassEncoderRelease(_sg.wgpu.rpass_enc); + _sg.wgpu.rpass_enc = 0; + } + if (_sg.wgpu.cpass_enc) { + wgpuComputePassEncoderEnd(_sg.wgpu.cpass_enc); + wgpuComputePassEncoderRelease(_sg.wgpu.cpass_enc); + _sg.wgpu.cpass_enc = 0; } } @@ -15568,7 +16480,7 @@ _SOKOL_PRIVATE void _sg_wgpu_commit(void) { } _SOKOL_PRIVATE void _sg_wgpu_apply_viewport(int x, int y, int w, int h, bool origin_top_left) { - SOKOL_ASSERT(_sg.wgpu.pass_enc); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); // FIXME FIXME FIXME: CLIPPING THE VIEWPORT HERE IS WRONG!!! // (but currently required because WebGPU insists that the viewport rectangle must be // fully contained inside the framebuffer, but this doesn't make any sense, and also @@ -15578,17 +16490,17 @@ _SOKOL_PRIVATE void _sg_wgpu_apply_viewport(int x, int y, int w, int h, bool ori float yf = (float) (origin_top_left ? clip.y : (_sg.cur_pass.height - (clip.y + clip.h))); float wf = (float) clip.w; float hf = (float) clip.h; - wgpuRenderPassEncoderSetViewport(_sg.wgpu.pass_enc, xf, yf, wf, hf, 0.0f, 1.0f); + wgpuRenderPassEncoderSetViewport(_sg.wgpu.rpass_enc, xf, yf, wf, hf, 0.0f, 1.0f); } _SOKOL_PRIVATE void _sg_wgpu_apply_scissor_rect(int x, int y, int w, int h, bool origin_top_left) { - SOKOL_ASSERT(_sg.wgpu.pass_enc); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); const _sg_recti_t clip = _sg_clipi(x, y, w, h, _sg.cur_pass.width, _sg.cur_pass.height); uint32_t sx = (uint32_t) clip.x; uint32_t sy = (uint32_t) (origin_top_left ? clip.y : (_sg.cur_pass.height - (clip.y + clip.h))); uint32_t sw = (uint32_t) clip.w; uint32_t sh = (uint32_t) clip.h; - wgpuRenderPassEncoderSetScissorRect(_sg.wgpu.pass_enc, sx, sy, sw, sh); + wgpuRenderPassEncoderSetScissorRect(_sg.wgpu.rpass_enc, sx, sy, sw, sh); } _SOKOL_PRIVATE void _sg_wgpu_set_ub_bindgroup(const _sg_shader_t* shd) { @@ -15604,24 +16516,42 @@ _SOKOL_PRIVATE void _sg_wgpu_set_ub_bindgroup(const _sg_shader_t* shd) { SOKOL_ASSERT(dynoffset_index < shd->wgpu.ub_num_dynoffsets); dyn_offsets[dynoffset_index] = _sg.wgpu.uniform.bind_offsets[i]; } - wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.pass_enc, - _SG_WGPU_UB_BINDGROUP_INDEX, - shd->wgpu.bg_ub, - shd->wgpu.ub_num_dynoffsets, - dyn_offsets); + if (_sg.cur_pass.is_compute) { + SOKOL_ASSERT(_sg.wgpu.cpass_enc); + wgpuComputePassEncoderSetBindGroup(_sg.wgpu.cpass_enc, + _SG_WGPU_UB_BINDGROUP_INDEX, + shd->wgpu.bg_ub, + shd->wgpu.ub_num_dynoffsets, + dyn_offsets); + } else { + SOKOL_ASSERT(_sg.wgpu.rpass_enc); + wgpuRenderPassEncoderSetBindGroup(_sg.wgpu.rpass_enc, + _SG_WGPU_UB_BINDGROUP_INDEX, + shd->wgpu.bg_ub, + shd->wgpu.ub_num_dynoffsets, + dyn_offsets); + } } _SOKOL_PRIVATE void _sg_wgpu_apply_pipeline(_sg_pipeline_t* pip) { SOKOL_ASSERT(pip); - SOKOL_ASSERT(pip->wgpu.pip); - SOKOL_ASSERT(_sg.wgpu.pass_enc); SOKOL_ASSERT(pip->shader && (pip->shader->slot.id == pip->cmn.shader_id.id)); - _sg.wgpu.use_indexed_draw = (pip->cmn.index_type != SG_INDEXTYPE_NONE); _sg.wgpu.cur_pipeline = pip; _sg.wgpu.cur_pipeline_id.id = pip->slot.id; - wgpuRenderPassEncoderSetPipeline(_sg.wgpu.pass_enc, pip->wgpu.pip); - wgpuRenderPassEncoderSetBlendConstant(_sg.wgpu.pass_enc, &pip->wgpu.blend_color); - wgpuRenderPassEncoderSetStencilReference(_sg.wgpu.pass_enc, pip->cmn.stencil.ref); + if (pip->cmn.is_compute) { + SOKOL_ASSERT(_sg.cur_pass.is_compute); + SOKOL_ASSERT(pip->wgpu.cpip); + SOKOL_ASSERT(_sg.wgpu.cpass_enc); + wgpuComputePassEncoderSetPipeline(_sg.wgpu.cpass_enc, pip->wgpu.cpip); + } else { + SOKOL_ASSERT(!_sg.cur_pass.is_compute); + SOKOL_ASSERT(pip->wgpu.rpip); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); + _sg.wgpu.use_indexed_draw = (pip->cmn.index_type != SG_INDEXTYPE_NONE); + wgpuRenderPassEncoderSetPipeline(_sg.wgpu.rpass_enc, pip->wgpu.rpip); + wgpuRenderPassEncoderSetBlendConstant(_sg.wgpu.rpass_enc, &pip->wgpu.blend_color); + wgpuRenderPassEncoderSetStencilReference(_sg.wgpu.rpass_enc, pip->cmn.stencil.ref); + } // bind groups must be set because pipelines without uniform blocks or resource bindings // will still create 'empty' BindGroupLayouts _sg_wgpu_set_ub_bindgroup(pip->shader); @@ -15629,19 +16559,19 @@ _SOKOL_PRIVATE void _sg_wgpu_apply_pipeline(_sg_pipeline_t* pip) { } _SOKOL_PRIVATE bool _sg_wgpu_apply_bindings(_sg_bindings_t* bnd) { - SOKOL_ASSERT(_sg.wgpu.pass_enc); SOKOL_ASSERT(bnd); SOKOL_ASSERT(bnd->pip->shader && (bnd->pip->cmn.shader_id.id == bnd->pip->shader->slot.id)); bool retval = true; - retval &= _sg_wgpu_apply_index_buffer(bnd); - retval &= _sg_wgpu_apply_vertex_buffers(bnd); + if (!_sg.cur_pass.is_compute) { + retval &= _sg_wgpu_apply_index_buffer(bnd); + retval &= _sg_wgpu_apply_vertex_buffers(bnd); + } retval &= _sg_wgpu_apply_bindgroup(bnd); return retval; } _SOKOL_PRIVATE void _sg_wgpu_apply_uniforms(int ub_slot, const sg_range* data) { const uint32_t alignment = _sg.wgpu.limits.limits.minUniformBufferOffsetAlignment; - SOKOL_ASSERT(_sg.wgpu.pass_enc); SOKOL_ASSERT(_sg.wgpu.uniform.staging); SOKOL_ASSERT((ub_slot >= 0) && (ub_slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS)); SOKOL_ASSERT((_sg.wgpu.uniform.offset + data->size) <= _sg.wgpu.uniform.num_bytes); @@ -15663,15 +16593,23 @@ _SOKOL_PRIVATE void _sg_wgpu_apply_uniforms(int ub_slot, const sg_range* data) { } _SOKOL_PRIVATE void _sg_wgpu_draw(int base_element, int num_elements, int num_instances) { - SOKOL_ASSERT(_sg.wgpu.pass_enc); + SOKOL_ASSERT(_sg.wgpu.rpass_enc); SOKOL_ASSERT(_sg.wgpu.cur_pipeline && (_sg.wgpu.cur_pipeline->slot.id == _sg.wgpu.cur_pipeline_id.id)); if (SG_INDEXTYPE_NONE != _sg.wgpu.cur_pipeline->cmn.index_type) { - wgpuRenderPassEncoderDrawIndexed(_sg.wgpu.pass_enc, (uint32_t)num_elements, (uint32_t)num_instances, (uint32_t)base_element, 0, 0); + wgpuRenderPassEncoderDrawIndexed(_sg.wgpu.rpass_enc, (uint32_t)num_elements, (uint32_t)num_instances, (uint32_t)base_element, 0, 0); } else { - wgpuRenderPassEncoderDraw(_sg.wgpu.pass_enc, (uint32_t)num_elements, (uint32_t)num_instances, (uint32_t)base_element, 0); + wgpuRenderPassEncoderDraw(_sg.wgpu.rpass_enc, (uint32_t)num_elements, (uint32_t)num_instances, (uint32_t)base_element, 0); } } +_SOKOL_PRIVATE void _sg_wgpu_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + SOKOL_ASSERT(_sg.wgpu.cpass_enc); + wgpuComputePassEncoderDispatchWorkgroups(_sg.wgpu.cpass_enc, + (uint32_t)num_groups_x, + (uint32_t)num_groups_y, + (uint32_t)num_groups_z); +} + _SOKOL_PRIVATE void _sg_wgpu_update_buffer(_sg_buffer_t* buf, const sg_range* data) { SOKOL_ASSERT(data && data->ptr && (data->size > 0)); SOKOL_ASSERT(buf); @@ -16113,6 +17051,22 @@ static inline void _sg_draw(int base_element, int num_elements, int num_instance #endif } +static inline void _sg_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + #if defined(_SOKOL_ANY_GL) + _sg_gl_dispatch(num_groups_x, num_groups_y, num_groups_z); + #elif defined(SOKOL_METAL) + _sg_mtl_dispatch(num_groups_x, num_groups_y, num_groups_z); + #elif defined(SOKOL_D3D11) + _sg_d3d11_dispatch(num_groups_x, num_groups_y, num_groups_z); + #elif defined(SOKOL_WGPU) + _sg_wgpu_dispatch(num_groups_x, num_groups_y, num_groups_z); + #elif defined(SOKOL_DUMMY_BACKEND) + _sg_dummy_dispatch(num_groups_x, num_groups_y, num_groups_z); + #else + #error("INVALID BACKEND"); + #endif +} + static inline void _sg_commit(void) { #if defined(_SOKOL_ANY_GL) _sg_gl_commit(); @@ -16198,7 +17152,7 @@ static inline void _sg_pop_debug_group(void) { // ██ ██████ ██████ ███████ // // >>pool -_SOKOL_PRIVATE void _sg_init_pool(_sg_pool_t* pool, int num) { +_SOKOL_PRIVATE void _sg_pool_init(_sg_pool_t* pool, int num) { SOKOL_ASSERT(pool && (num >= 1)); // slot 0 is reserved for the 'invalid id', so bump the pool size by 1 pool->size = num + 1; @@ -16214,7 +17168,7 @@ _SOKOL_PRIVATE void _sg_init_pool(_sg_pool_t* pool, int num) { } } -_SOKOL_PRIVATE void _sg_discard_pool(_sg_pool_t* pool) { +_SOKOL_PRIVATE void _sg_pool_discard(_sg_pool_t* pool) { SOKOL_ASSERT(pool); SOKOL_ASSERT(pool->free_queue); _sg_free(pool->free_queue); @@ -16254,7 +17208,7 @@ _SOKOL_PRIVATE void _sg_pool_free_index(_sg_pool_t* pool, int slot_index) { SOKOL_ASSERT(pool->queue_top <= (pool->size-1)); } -_SOKOL_PRIVATE void _sg_reset_slot(_sg_slot_t* slot) { +_SOKOL_PRIVATE void _sg_slot_reset(_sg_slot_t* slot) { SOKOL_ASSERT(slot); _sg_clear(slot, sizeof(_sg_slot_t)); } @@ -16312,32 +17266,32 @@ _SOKOL_PRIVATE void _sg_setup_pools(_sg_pools_t* p, const sg_desc* desc) { SOKOL_ASSERT(desc); // note: the pools here will have an additional item, since slot 0 is reserved SOKOL_ASSERT((desc->buffer_pool_size > 0) && (desc->buffer_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->buffer_pool, desc->buffer_pool_size); + _sg_pool_init(&p->buffer_pool, desc->buffer_pool_size); size_t buffer_pool_byte_size = sizeof(_sg_buffer_t) * (size_t)p->buffer_pool.size; p->buffers = (_sg_buffer_t*) _sg_malloc_clear(buffer_pool_byte_size); SOKOL_ASSERT((desc->image_pool_size > 0) && (desc->image_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->image_pool, desc->image_pool_size); + _sg_pool_init(&p->image_pool, desc->image_pool_size); size_t image_pool_byte_size = sizeof(_sg_image_t) * (size_t)p->image_pool.size; p->images = (_sg_image_t*) _sg_malloc_clear(image_pool_byte_size); SOKOL_ASSERT((desc->sampler_pool_size > 0) && (desc->sampler_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->sampler_pool, desc->sampler_pool_size); + _sg_pool_init(&p->sampler_pool, desc->sampler_pool_size); size_t sampler_pool_byte_size = sizeof(_sg_sampler_t) * (size_t)p->sampler_pool.size; p->samplers = (_sg_sampler_t*) _sg_malloc_clear(sampler_pool_byte_size); SOKOL_ASSERT((desc->shader_pool_size > 0) && (desc->shader_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->shader_pool, desc->shader_pool_size); + _sg_pool_init(&p->shader_pool, desc->shader_pool_size); size_t shader_pool_byte_size = sizeof(_sg_shader_t) * (size_t)p->shader_pool.size; p->shaders = (_sg_shader_t*) _sg_malloc_clear(shader_pool_byte_size); SOKOL_ASSERT((desc->pipeline_pool_size > 0) && (desc->pipeline_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->pipeline_pool, desc->pipeline_pool_size); + _sg_pool_init(&p->pipeline_pool, desc->pipeline_pool_size); size_t pipeline_pool_byte_size = sizeof(_sg_pipeline_t) * (size_t)p->pipeline_pool.size; p->pipelines = (_sg_pipeline_t*) _sg_malloc_clear(pipeline_pool_byte_size); SOKOL_ASSERT((desc->attachments_pool_size > 0) && (desc->attachments_pool_size < _SG_MAX_POOL_SIZE)); - _sg_init_pool(&p->attachments_pool, desc->attachments_pool_size); + _sg_pool_init(&p->attachments_pool, desc->attachments_pool_size); size_t attachments_pool_byte_size = sizeof(_sg_attachments_t) * (size_t)p->attachments_pool.size; p->attachments = (_sg_attachments_t*) _sg_malloc_clear(attachments_pool_byte_size); } @@ -16350,12 +17304,12 @@ _SOKOL_PRIVATE void _sg_discard_pools(_sg_pools_t* p) { _sg_free(p->samplers); p->samplers = 0; _sg_free(p->images); p->images = 0; _sg_free(p->buffers); p->buffers = 0; - _sg_discard_pool(&p->attachments_pool); - _sg_discard_pool(&p->pipeline_pool); - _sg_discard_pool(&p->shader_pool); - _sg_discard_pool(&p->sampler_pool); - _sg_discard_pool(&p->image_pool); - _sg_discard_pool(&p->buffer_pool); + _sg_pool_discard(&p->attachments_pool); + _sg_pool_discard(&p->pipeline_pool); + _sg_pool_discard(&p->shader_pool); + _sg_pool_discard(&p->sampler_pool); + _sg_pool_discard(&p->image_pool); + _sg_pool_discard(&p->buffer_pool); } /* allocate the slot at slot_index: @@ -16540,6 +17494,48 @@ _SOKOL_PRIVATE void _sg_discard_all_resources(_sg_pools_t* p) { } } +// ████████ ██████ █████ ██████ ██ ██ ███████ ██████ +// ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ +// ██ ██████ ███████ ██ █████ █████ ██████ +// ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ +// ██ ██ ██ ██ ██ ██████ ██ ██ ███████ ██ ██ +// +// >>tracker +_SOKOL_PRIVATE void _sg_tracker_init(_sg_tracker_t* tracker, uint32_t num) { + SOKOL_ASSERT(tracker); + SOKOL_ASSERT(num > 0); + SOKOL_ASSERT(0 == tracker->size); + SOKOL_ASSERT(0 == tracker->cur); + SOKOL_ASSERT(0 == tracker->items); + tracker->size = (uint32_t)num; + tracker->items = (uint32_t*)_sg_malloc_clear(num * sizeof(uint32_t)); +} + +_SOKOL_PRIVATE void _sg_tracker_discard(_sg_tracker_t* tracker) { + SOKOL_ASSERT(tracker); + if (tracker->items) { + _sg_free(tracker->items); + } + tracker->size = 0; + tracker->cur = 0; + tracker->items = 0; +} + +_SOKOL_PRIVATE void _sg_tracker_reset(_sg_tracker_t* tracker) { + SOKOL_ASSERT(tracker && tracker->items); + tracker->cur = 0; +} + +_SOKOL_PRIVATE bool _sg_tracker_add(_sg_tracker_t* tracker, uint32_t res_id) { + SOKOL_ASSERT(tracker && tracker->items); + if (tracker->cur < tracker->size) { + tracker->items[tracker->cur++] = res_id; + return true; + } else { + return false; + } +} + // ██ ██ █████ ██ ██ ██████ █████ ████████ ██ ██████ ███ ██ // ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ██ ████ ██ // ██ ██ ███████ ██ ██ ██ ██ ███████ ██ ██ ██ ██ ██ ██ ██ @@ -16578,19 +17574,23 @@ _SOKOL_PRIVATE bool _sg_validate_buffer_desc(const sg_buffer_desc* desc) { _sg_validate_begin(); _SG_VALIDATE(desc->_start_canary == 0, VALIDATE_BUFFERDESC_CANARY); _SG_VALIDATE(desc->_end_canary == 0, VALIDATE_BUFFERDESC_CANARY); - _SG_VALIDATE(desc->size > 0, VALIDATE_BUFFERDESC_SIZE); + _SG_VALIDATE(desc->size > 0, VALIDATE_BUFFERDESC_EXPECT_NONZERO_SIZE); bool injected = (0 != desc->gl_buffers[0]) || (0 != desc->mtl_buffers[0]) || (0 != desc->d3d11_buffer) || (0 != desc->wgpu_buffer); if (!injected && (desc->usage == SG_USAGE_IMMUTABLE)) { - _SG_VALIDATE((0 != desc->data.ptr) && (desc->data.size > 0), VALIDATE_BUFFERDESC_DATA); - _SG_VALIDATE(desc->size == desc->data.size, VALIDATE_BUFFERDESC_DATA_SIZE); + if (desc->data.ptr) { + _SG_VALIDATE(desc->size == desc->data.size, VALIDATE_BUFFERDESC_EXPECT_MATCHING_DATA_SIZE); + } else { + _SG_VALIDATE(desc->data.size == 0, VALIDATE_BUFFERDESC_EXPECT_ZERO_DATA_SIZE); + } } else { - _SG_VALIDATE(0 == desc->data.ptr, VALIDATE_BUFFERDESC_NO_DATA); + _SG_VALIDATE(0 == desc->data.ptr, VALIDATE_BUFFERDESC_EXPECT_NO_DATA); + _SG_VALIDATE(desc->data.size == 0, VALIDATE_BUFFERDESC_EXPECT_ZERO_DATA_SIZE); } if (desc->type == SG_BUFFERTYPE_STORAGEBUFFER) { - _SG_VALIDATE(_sg.features.storage_buffer, VALIDATE_BUFFERDESC_STORAGEBUFFER_SUPPORTED); + _SG_VALIDATE(_sg.features.compute, VALIDATE_BUFFERDESC_STORAGEBUFFER_SUPPORTED); _SG_VALIDATE(_sg_multiple_u64(desc->size, 4), VALIDATE_BUFFERDESC_STORAGEBUFFER_SIZE_MULTIPLE_4); } return _sg_validate_end(); @@ -16745,6 +17745,10 @@ _SOKOL_PRIVATE _sg_u128_t _sg_validate_set_slot_bit(_sg_u128_t bits, sg_shader_s SOKOL_ASSERT(slot < 64); bits.hi |= 1ULL << slot; break; + case SG_SHADERSTAGE_COMPUTE: + SOKOL_ASSERT(slot < 64); + bits.lo |= 1ULL << slot; + break; } return bits; } @@ -16768,6 +17772,10 @@ _SOKOL_PRIVATE bool _sg_validate_slot_bits(_sg_u128_t bits, sg_shader_stage stag SOKOL_ASSERT(slot < 64); mask.hi = 1ULL << slot; break; + case SG_SHADERSTAGE_COMPUTE: + SOKOL_ASSERT(slot < 64); + mask.lo = 1ULL << slot; + break; default: SOKOL_UNREACHABLE; break; @@ -16784,20 +17792,42 @@ _SOKOL_PRIVATE bool _sg_validate_shader_desc(const sg_shader_desc* desc) { return true; } SOKOL_ASSERT(desc); + bool is_compute_shader = (desc->compute_func.source != 0) || (desc->compute_func.bytecode.ptr != 0); _sg_validate_begin(); _SG_VALIDATE(desc->_start_canary == 0, VALIDATE_SHADERDESC_CANARY); _SG_VALIDATE(desc->_end_canary == 0, VALIDATE_SHADERDESC_CANARY); #if defined(SOKOL_GLCORE) || defined(SOKOL_GLES3) || defined(SOKOL_WGPU) // on GL or WebGPU, must provide shader source code - _SG_VALIDATE(0 != desc->vertex_func.source, VALIDATE_SHADERDESC_SOURCE); - _SG_VALIDATE(0 != desc->fragment_func.source, VALIDATE_SHADERDESC_SOURCE); + if (is_compute_shader) { + _SG_VALIDATE(0 != desc->compute_func.source, VALIDATE_SHADERDESC_COMPUTE_SOURCE); + } else { + _SG_VALIDATE(0 != desc->vertex_func.source, VALIDATE_SHADERDESC_VERTEX_SOURCE); + _SG_VALIDATE(0 != desc->fragment_func.source, VALIDATE_SHADERDESC_FRAGMENT_SOURCE); + } #elif defined(SOKOL_METAL) || defined(SOKOL_D3D11) // on Metal or D3D11, must provide shader source code or byte code - _SG_VALIDATE((0 != desc->vertex_func.source)||(0 != desc->vertex_func.bytecode.ptr), VALIDATE_SHADERDESC_SOURCE_OR_BYTECODE); - _SG_VALIDATE((0 != desc->fragment_func.source)||(0 != desc->fragment_func.bytecode.ptr), VALIDATE_SHADERDESC_SOURCE_OR_BYTECODE); + if (is_compute_shader) { + _SG_VALIDATE((0 != desc->compute_func.source) || (0 != desc->compute_func.bytecode.ptr), VALIDATE_SHADERDESC_COMPUTE_SOURCE_OR_BYTECODE); + } else { + _SG_VALIDATE((0 != desc->vertex_func.source)|| (0 != desc->vertex_func.bytecode.ptr), VALIDATE_SHADERDESC_VERTEX_SOURCE_OR_BYTECODE); + _SG_VALIDATE((0 != desc->fragment_func.source) || (0 != desc->fragment_func.bytecode.ptr), VALIDATE_SHADERDESC_FRAGMENT_SOURCE_OR_BYTECODE); + } #else // Dummy Backend, don't require source or bytecode #endif + if (is_compute_shader) { + _SG_VALIDATE((0 == desc->vertex_func.source) && (0 == desc->vertex_func.bytecode.ptr), VALIDATE_SHADERDESC_INVALID_SHADER_COMBO); + _SG_VALIDATE((0 == desc->fragment_func.source) && (0 == desc->fragment_func.bytecode.ptr), VALIDATE_SHADERDESC_INVALID_SHADER_COMBO); + } else { + _SG_VALIDATE((0 == desc->compute_func.source) && (0 == desc->compute_func.bytecode.ptr), VALIDATE_SHADERDESC_INVALID_SHADER_COMBO); + } + #if defined(SOKOL_METAL) + if (is_compute_shader) { + _SG_VALIDATE(desc->mtl_threads_per_threadgroup.x > 0, VALIDATE_SHADERDESC_METAL_THREADS_PER_THREADGROUP); + _SG_VALIDATE(desc->mtl_threads_per_threadgroup.y > 0, VALIDATE_SHADERDESC_METAL_THREADS_PER_THREADGROUP); + _SG_VALIDATE(desc->mtl_threads_per_threadgroup.z > 0, VALIDATE_SHADERDESC_METAL_THREADS_PER_THREADGROUP); + } + #endif for (size_t i = 0; i < SG_MAX_VERTEX_ATTRIBUTES; i++) { if (desc->attrs[i].glsl_name) { _SG_VALIDATE(strlen(desc->attrs[i].glsl_name) < _SG_STRING_SIZE, VALIDATE_SHADERDESC_ATTR_STRING_TOO_LONG); @@ -16813,6 +17843,9 @@ _SOKOL_PRIVATE bool _sg_validate_shader_desc(const sg_shader_desc* desc) { if (0 != desc->fragment_func.bytecode.ptr) { _SG_VALIDATE(desc->fragment_func.bytecode.size > 0, VALIDATE_SHADERDESC_NO_BYTECODE_SIZE); } + if (0 != desc->compute_func.bytecode.ptr) { + _SG_VALIDATE(desc->compute_func.bytecode.size > 0, VALIDATE_SHADERDESC_NO_BYTECODE_SIZE); + } #if defined(SOKOL_METAL) _sg_u128_t msl_buf_bits = _sg_u128(); @@ -16820,7 +17853,8 @@ _SOKOL_PRIVATE bool _sg_validate_shader_desc(const sg_shader_desc* desc) { _sg_u128_t msl_smp_bits = _sg_u128(); #elif defined(SOKOL_D3D11) _sg_u128_t hlsl_buf_bits = _sg_u128(); - _sg_u128_t hlsl_tex_bits = _sg_u128(); + _sg_u128_t hlsl_srv_bits = _sg_u128(); + _sg_u128_t hlsl_uav_bits = _sg_u128(); _sg_u128_t hlsl_smp_bits = _sg_u128(); #elif defined(_SOKOL_ANY_GL) _sg_u128_t glsl_bnd_bits = _sg_u128(); @@ -16886,15 +17920,20 @@ _SOKOL_PRIVATE bool _sg_validate_shader_desc(const sg_shader_desc* desc) { if (sbuf_desc->stage == SG_SHADERSTAGE_NONE) { continue; } - _SG_VALIDATE(sbuf_desc->readonly, VALIDATE_SHADERDESC_STORAGEBUFFER_READONLY); #if defined(SOKOL_METAL) _SG_VALIDATE((sbuf_desc->msl_buffer_n >= _SG_MTL_MAX_STAGE_UB_BINDINGS) && (sbuf_desc->msl_buffer_n < _SG_MTL_MAX_STAGE_UB_SBUF_BINDINGS), VALIDATE_SHADERDESC_STORAGEBUFFER_METAL_BUFFER_SLOT_OUT_OF_RANGE); _SG_VALIDATE(_sg_validate_slot_bits(msl_buf_bits, sbuf_desc->stage, sbuf_desc->msl_buffer_n), VALIDATE_SHADERDESC_STORAGEBUFFER_METAL_BUFFER_SLOT_COLLISION); msl_buf_bits = _sg_validate_set_slot_bit(msl_buf_bits, sbuf_desc->stage, sbuf_desc->msl_buffer_n); #elif defined(SOKOL_D3D11) - _SG_VALIDATE(sbuf_desc->hlsl_register_t_n < _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS, VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_OUT_OF_RANGE); - _SG_VALIDATE(_sg_validate_slot_bits(hlsl_tex_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_t_n), VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_COLLISION); - hlsl_tex_bits = _sg_validate_set_slot_bit(hlsl_tex_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_t_n); + if (sbuf_desc->readonly) { + _SG_VALIDATE(sbuf_desc->hlsl_register_t_n < _SG_D3D11_MAX_STAGE_SRV_BINDINGS, VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_OUT_OF_RANGE); + _SG_VALIDATE(_sg_validate_slot_bits(hlsl_srv_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_t_n), VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_T_COLLISION); + hlsl_srv_bits = _sg_validate_set_slot_bit(hlsl_srv_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_t_n); + } else { + _SG_VALIDATE(sbuf_desc->hlsl_register_u_n < _SG_D3D11_MAX_STAGE_UAV_BINDINGS, VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_U_OUT_OF_RANGE); + _SG_VALIDATE(_sg_validate_slot_bits(hlsl_uav_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_u_n), VALIDATE_SHADERDESC_STORAGEBUFFER_HLSL_REGISTER_U_COLLISION); + hlsl_uav_bits = _sg_validate_set_slot_bit(hlsl_uav_bits, sbuf_desc->stage, sbuf_desc->hlsl_register_u_n); + } #elif defined(_SOKOL_ANY_GL) _SG_VALIDATE(sbuf_desc->glsl_binding_n < _SG_GL_MAX_SBUF_BINDINGS, VALIDATE_SHADERDESC_STORAGEBUFFER_GLSL_BINDING_OUT_OF_RANGE); _SG_VALIDATE(_sg_validate_slot_bits(glsl_bnd_bits, SG_SHADERSTAGE_NONE, sbuf_desc->glsl_binding_n), VALIDATE_SHADERDESC_STORAGEBUFFER_GLSL_BINDING_COLLISION); @@ -16918,9 +17957,9 @@ _SOKOL_PRIVATE bool _sg_validate_shader_desc(const sg_shader_desc* desc) { _SG_VALIDATE(_sg_validate_slot_bits(msl_tex_bits, img_desc->stage, img_desc->msl_texture_n), VALIDATE_SHADERDESC_IMAGE_METAL_TEXTURE_SLOT_COLLISION); msl_tex_bits = _sg_validate_set_slot_bit(msl_tex_bits, img_desc->stage, img_desc->msl_texture_n); #elif defined(SOKOL_D3D11) - _SG_VALIDATE(img_desc->hlsl_register_t_n < _SG_D3D11_MAX_STAGE_TEX_SBUF_BINDINGS, VALIDATE_SHADERDESC_IMAGE_HLSL_REGISTER_T_OUT_OF_RANGE); - _SG_VALIDATE(_sg_validate_slot_bits(hlsl_tex_bits, img_desc->stage, img_desc->hlsl_register_t_n), VALIDATE_SHADERDESC_IMAGE_HLSL_REGISTER_T_COLLISION); - hlsl_tex_bits = _sg_validate_set_slot_bit(hlsl_tex_bits, img_desc->stage, img_desc->hlsl_register_t_n); + _SG_VALIDATE(img_desc->hlsl_register_t_n < _SG_D3D11_MAX_STAGE_SRV_BINDINGS, VALIDATE_SHADERDESC_IMAGE_HLSL_REGISTER_T_OUT_OF_RANGE); + _SG_VALIDATE(_sg_validate_slot_bits(hlsl_srv_bits, img_desc->stage, img_desc->hlsl_register_t_n), VALIDATE_SHADERDESC_IMAGE_HLSL_REGISTER_T_COLLISION); + hlsl_srv_bits = _sg_validate_set_slot_bit(hlsl_srv_bits, img_desc->stage, img_desc->hlsl_register_t_n); #elif defined(SOKOL_WGPU) _SG_VALIDATE(img_desc->wgsl_group1_binding_n < _SG_WGPU_MAX_IMG_SMP_SBUF_BIND_SLOTS, VALIDATE_SHADERDESC_IMAGE_WGSL_GROUP1_BINDING_OUT_OF_RANGE); _SG_VALIDATE(_sg_validate_slot_bits(wgsl_group1_bits, SG_SHADERSTAGE_NONE, img_desc->wgsl_group1_binding_n), VALIDATE_SHADERDESC_IMAGE_WGSL_GROUP1_BINDING_COLLISION); @@ -17004,30 +18043,41 @@ _SOKOL_PRIVATE bool _sg_validate_pipeline_desc(const sg_pipeline_desc* desc) { _SG_VALIDATE(desc->_start_canary == 0, VALIDATE_PIPELINEDESC_CANARY); _SG_VALIDATE(desc->_end_canary == 0, VALIDATE_PIPELINEDESC_CANARY); _SG_VALIDATE(desc->shader.id != SG_INVALID_ID, VALIDATE_PIPELINEDESC_SHADER); - for (size_t buf_index = 0; buf_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; buf_index++) { - const sg_vertex_buffer_layout_state* l_state = &desc->layout.buffers[buf_index]; - if (l_state->stride == 0) { - continue; - } - _SG_VALIDATE(_sg_multiple_u64((uint64_t)l_state->stride, 4), VALIDATE_PIPELINEDESC_LAYOUT_STRIDE4); - } const _sg_shader_t* shd = _sg_lookup_shader(&_sg.pools, desc->shader.id); _SG_VALIDATE(0 != shd, VALIDATE_PIPELINEDESC_SHADER); if (shd) { _SG_VALIDATE(shd->slot.state == SG_RESOURCESTATE_VALID, VALIDATE_PIPELINEDESC_SHADER); - bool attrs_cont = true; - for (size_t attr_index = 0; attr_index < SG_MAX_VERTEX_ATTRIBUTES; attr_index++) { - const sg_vertex_attr_state* a_state = &desc->layout.attrs[attr_index]; - if (a_state->format == SG_VERTEXFORMAT_INVALID) { - attrs_cont = false; - continue; + if (desc->compute) { + _SG_VALIDATE(shd->cmn.is_compute, VALIDATE_PIPELINEDESC_COMPUTE_SHADER_EXPECTED); + } else { + _SG_VALIDATE(!shd->cmn.is_compute, VALIDATE_PIPELINEDESC_NO_COMPUTE_SHADER_EXPECTED); + bool attrs_cont = true; + for (int attr_index = 0; attr_index < SG_MAX_VERTEX_ATTRIBUTES; attr_index++) { + const sg_vertex_attr_state* a_state = &desc->layout.attrs[attr_index]; + if (a_state->format == SG_VERTEXFORMAT_INVALID) { + attrs_cont = false; + continue; + } + _SG_VALIDATE(attrs_cont, VALIDATE_PIPELINEDESC_NO_CONT_ATTRS); + SOKOL_ASSERT(a_state->buffer_index < SG_MAX_VERTEXBUFFER_BINDSLOTS); + #if defined(SOKOL_D3D11) + // on D3D11, semantic names (and semantic indices) must be provided + _SG_VALIDATE(!_sg_strempty(&shd->d3d11.attrs[attr_index].sem_name), VALIDATE_PIPELINEDESC_ATTR_SEMANTICS); + #endif + } + // must only use readonly storage buffer bindings in render pipelines + for (size_t i = 0; i < SG_MAX_STORAGEBUFFER_BINDSLOTS; i++) { + if (shd->cmn.storage_buffers[i].stage != SG_SHADERSTAGE_NONE) { + _SG_VALIDATE(shd->cmn.storage_buffers[i].readonly, VALIDATE_PIPELINEDESC_SHADER_READONLY_STORAGEBUFFERS); + } + } + for (int buf_index = 0; buf_index < SG_MAX_VERTEXBUFFER_BINDSLOTS; buf_index++) { + const sg_vertex_buffer_layout_state* l_state = &desc->layout.buffers[buf_index]; + if (l_state->stride == 0) { + continue; + } + _SG_VALIDATE(_sg_multiple_u64((uint64_t)l_state->stride, 4), VALIDATE_PIPELINEDESC_LAYOUT_STRIDE4); } - _SG_VALIDATE(attrs_cont, VALIDATE_PIPELINEDESC_NO_CONT_ATTRS); - SOKOL_ASSERT(a_state->buffer_index < SG_MAX_VERTEXBUFFER_BINDSLOTS); - #if defined(SOKOL_D3D11) - // on D3D11, semantic names (and semantic indices) must be provided - _SG_VALIDATE(!_sg_strempty(&shd->d3d11.attrs[attr_index].sem_name), VALIDATE_PIPELINEDESC_ATTR_SEMANTICS); - #endif } } for (size_t color_index = 0; color_index < (size_t)desc->color_count; color_index++) { @@ -17153,10 +18203,16 @@ _SOKOL_PRIVATE bool _sg_validate_begin_pass(const sg_pass* pass) { if (_sg.desc.disable_validation) { return true; } + const bool is_compute_pass = pass->compute; + const bool is_swapchain_pass = !is_compute_pass && (pass->attachments.id == SG_INVALID_ID); + const bool is_offscreen_pass = !(is_compute_pass || is_swapchain_pass); _sg_validate_begin(); _SG_VALIDATE(pass->_start_canary == 0, VALIDATE_BEGINPASS_CANARY); _SG_VALIDATE(pass->_end_canary == 0, VALIDATE_BEGINPASS_CANARY); - if (pass->attachments.id == SG_INVALID_ID) { + if (is_compute_pass) { + // this is a compute pass + _SG_VALIDATE(pass->attachments.id == SG_INVALID_ID, VALIDATE_BEGINPASS_EXPECT_NO_ATTACHMENTS); + } else if (is_swapchain_pass) { // this is a swapchain pass _SG_VALIDATE(pass->swapchain.width > 0, VALIDATE_BEGINPASS_SWAPCHAIN_EXPECT_WIDTH); _SG_VALIDATE(pass->swapchain.height > 0, VALIDATE_BEGINPASS_SWAPCHAIN_EXPECT_HEIGHT); @@ -17229,7 +18285,8 @@ _SOKOL_PRIVATE bool _sg_validate_begin_pass(const sg_pass* pass) { } else { _SG_VALIDATE(atts != 0, VALIDATE_BEGINPASS_ATTACHMENTS_EXISTS); } - // swapchain params must be all zero! + } + if (is_compute_pass || is_offscreen_pass) { _SG_VALIDATE(pass->swapchain.width == 0, VALIDATE_BEGINPASS_SWAPCHAIN_EXPECT_WIDTH_NOTSET); _SG_VALIDATE(pass->swapchain.height == 0, VALIDATE_BEGINPASS_SWAPCHAIN_EXPECT_HEIGHT_NOTSET); _SG_VALIDATE(pass->swapchain.sample_count == 0, VALIDATE_BEGINPASS_SWAPCHAIN_EXPECT_SAMPLECOUNT_NOTSET); @@ -17255,6 +18312,42 @@ _SOKOL_PRIVATE bool _sg_validate_begin_pass(const sg_pass* pass) { #endif } +_SOKOL_PRIVATE bool _sg_validate_apply_viewport(int x, int y, int width, int height, bool origin_top_left) { + _SOKOL_UNUSED(x); + _SOKOL_UNUSED(y); + _SOKOL_UNUSED(width); + _SOKOL_UNUSED(height); + _SOKOL_UNUSED(origin_top_left); + #if !defined(SOKOL_DEBUG) + return true; + #else + if (_sg.desc.disable_validation) { + return true; + } + _sg_validate_begin(); + _SG_VALIDATE(_sg.cur_pass.in_pass && !_sg.cur_pass.is_compute, VALIDATE_AVP_RENDERPASS_EXPECTED); + return _sg_validate_end(); + #endif +} + +_SOKOL_PRIVATE bool _sg_validate_apply_scissor_rect(int x, int y, int width, int height, bool origin_top_left) { + _SOKOL_UNUSED(x); + _SOKOL_UNUSED(y); + _SOKOL_UNUSED(width); + _SOKOL_UNUSED(height); + _SOKOL_UNUSED(origin_top_left); + #if !defined(SOKOL_DEBUG) + return true; + #else + if (_sg.desc.disable_validation) { + return true; + } + _sg_validate_begin(); + _SG_VALIDATE(_sg.cur_pass.in_pass && !_sg.cur_pass.is_compute, VALIDATE_ASR_RENDERPASS_EXPECTED); + return _sg_validate_end(); + #endif +} + _SOKOL_PRIVATE bool _sg_validate_apply_pipeline(sg_pipeline pip_id) { #if !defined(SOKOL_DEBUG) _SOKOL_UNUSED(pip_id); @@ -17274,34 +18367,40 @@ _SOKOL_PRIVATE bool _sg_validate_apply_pipeline(sg_pipeline pip_id) { _SG_VALIDATE(pip->slot.state == SG_RESOURCESTATE_VALID, VALIDATE_APIP_PIPELINE_VALID); // the pipeline's shader must be alive and valid SOKOL_ASSERT(pip->shader); + _SG_VALIDATE(_sg.cur_pass.in_pass, VALIDATE_APIP_PASS_EXPECTED); _SG_VALIDATE(pip->shader->slot.id == pip->cmn.shader_id.id, VALIDATE_APIP_SHADER_EXISTS); _SG_VALIDATE(pip->shader->slot.state == SG_RESOURCESTATE_VALID, VALIDATE_APIP_SHADER_VALID); - // check that pipeline attributes match current pass attributes - if (_sg.cur_pass.atts_id.id != SG_INVALID_ID) { - // an offscreen pass - const _sg_attachments_t* atts = _sg.cur_pass.atts; - SOKOL_ASSERT(atts); - _SG_VALIDATE(atts->slot.id == _sg.cur_pass.atts_id.id, VALIDATE_APIP_CURPASS_ATTACHMENTS_EXISTS); - _SG_VALIDATE(atts->slot.state == SG_RESOURCESTATE_VALID, VALIDATE_APIP_CURPASS_ATTACHMENTS_VALID); - - _SG_VALIDATE(pip->cmn.color_count == atts->cmn.num_colors, VALIDATE_APIP_ATT_COUNT); - for (int i = 0; i < pip->cmn.color_count; i++) { - const _sg_image_t* att_img = _sg_attachments_color_image(atts, i); - _SG_VALIDATE(pip->cmn.colors[i].pixel_format == att_img->cmn.pixel_format, VALIDATE_APIP_COLOR_FORMAT); - _SG_VALIDATE(pip->cmn.sample_count == att_img->cmn.sample_count, VALIDATE_APIP_SAMPLE_COUNT); - } - const _sg_image_t* att_dsimg = _sg_attachments_ds_image(atts); - if (att_dsimg) { - _SG_VALIDATE(pip->cmn.depth.pixel_format == att_dsimg->cmn.pixel_format, VALIDATE_APIP_DEPTH_FORMAT); - } else { - _SG_VALIDATE(pip->cmn.depth.pixel_format == SG_PIXELFORMAT_NONE, VALIDATE_APIP_DEPTH_FORMAT); - } + if (pip->cmn.is_compute) { + _SG_VALIDATE(_sg.cur_pass.is_compute, VALIDATE_APIP_COMPUTEPASS_EXPECTED); } else { - // default pass - _SG_VALIDATE(pip->cmn.color_count == 1, VALIDATE_APIP_ATT_COUNT); - _SG_VALIDATE(pip->cmn.colors[0].pixel_format == _sg.cur_pass.swapchain.color_fmt, VALIDATE_APIP_COLOR_FORMAT); - _SG_VALIDATE(pip->cmn.depth.pixel_format == _sg.cur_pass.swapchain.depth_fmt, VALIDATE_APIP_DEPTH_FORMAT); - _SG_VALIDATE(pip->cmn.sample_count == _sg.cur_pass.swapchain.sample_count, VALIDATE_APIP_SAMPLE_COUNT); + _SG_VALIDATE(!_sg.cur_pass.is_compute, VALIDATE_APIP_RENDERPASS_EXPECTED); + // check that pipeline attributes match current pass attributes + if (_sg.cur_pass.atts_id.id != SG_INVALID_ID) { + // an offscreen pass + const _sg_attachments_t* atts = _sg.cur_pass.atts; + SOKOL_ASSERT(atts); + _SG_VALIDATE(atts->slot.id == _sg.cur_pass.atts_id.id, VALIDATE_APIP_CURPASS_ATTACHMENTS_EXISTS); + _SG_VALIDATE(atts->slot.state == SG_RESOURCESTATE_VALID, VALIDATE_APIP_CURPASS_ATTACHMENTS_VALID); + + _SG_VALIDATE(pip->cmn.color_count == atts->cmn.num_colors, VALIDATE_APIP_ATT_COUNT); + for (int i = 0; i < pip->cmn.color_count; i++) { + const _sg_image_t* att_img = _sg_attachments_color_image(atts, i); + _SG_VALIDATE(pip->cmn.colors[i].pixel_format == att_img->cmn.pixel_format, VALIDATE_APIP_COLOR_FORMAT); + _SG_VALIDATE(pip->cmn.sample_count == att_img->cmn.sample_count, VALIDATE_APIP_SAMPLE_COUNT); + } + const _sg_image_t* att_dsimg = _sg_attachments_ds_image(atts); + if (att_dsimg) { + _SG_VALIDATE(pip->cmn.depth.pixel_format == att_dsimg->cmn.pixel_format, VALIDATE_APIP_DEPTH_FORMAT); + } else { + _SG_VALIDATE(pip->cmn.depth.pixel_format == SG_PIXELFORMAT_NONE, VALIDATE_APIP_DEPTH_FORMAT); + } + } else { + // default pass + _SG_VALIDATE(pip->cmn.color_count == 1, VALIDATE_APIP_ATT_COUNT); + _SG_VALIDATE(pip->cmn.colors[0].pixel_format == _sg.cur_pass.swapchain.color_fmt, VALIDATE_APIP_COLOR_FORMAT); + _SG_VALIDATE(pip->cmn.depth.pixel_format == _sg.cur_pass.swapchain.depth_fmt, VALIDATE_APIP_DEPTH_FORMAT); + _SG_VALIDATE(pip->cmn.sample_count == _sg.cur_pass.swapchain.sample_count, VALIDATE_APIP_SAMPLE_COUNT); + } } return _sg_validate_end(); #endif @@ -17317,6 +18416,9 @@ _SOKOL_PRIVATE bool _sg_validate_apply_bindings(const sg_bindings* bindings) { } _sg_validate_begin(); + // must be called in a pass + _SG_VALIDATE(_sg.cur_pass.in_pass, VALIDATE_ABND_PASS_EXPECTED); + // a pipeline object must have been applied _SG_VALIDATE(_sg.cur_pipeline.id != SG_INVALID_ID, VALIDATE_ABND_PIPELINE); const _sg_pipeline_t* pip = _sg_lookup_pipeline(&_sg.pools, _sg.cur_pipeline.id); @@ -17328,37 +18430,47 @@ _SOKOL_PRIVATE bool _sg_validate_apply_bindings(const sg_bindings* bindings) { SOKOL_ASSERT(pip->shader && (pip->cmn.shader_id.id == pip->shader->slot.id)); const _sg_shader_t* shd = pip->shader; - // has expected vertex buffers, and vertex buffers still exist - for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { - if (pip->cmn.vertex_buffer_layout_active[i]) { - _SG_VALIDATE(bindings->vertex_buffers[i].id != SG_INVALID_ID, VALIDATE_ABND_EXPECTED_VB); - // buffers in vertex-buffer-slots must be of type SG_BUFFERTYPE_VERTEXBUFFER - if (bindings->vertex_buffers[i].id != SG_INVALID_ID) { - const _sg_buffer_t* buf = _sg_lookup_buffer(&_sg.pools, bindings->vertex_buffers[i].id); - _SG_VALIDATE(buf != 0, VALIDATE_ABND_VB_EXISTS); - if (buf && buf->slot.state == SG_RESOURCESTATE_VALID) { - _SG_VALIDATE(SG_BUFFERTYPE_VERTEXBUFFER == buf->cmn.type, VALIDATE_ABND_VB_TYPE); - _SG_VALIDATE(!buf->cmn.append_overflow, VALIDATE_ABND_VB_OVERFLOW); + if (_sg.cur_pass.is_compute) { + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + _SG_VALIDATE(bindings->vertex_buffers[i].id == SG_INVALID_ID, VALIDATE_ABND_COMPUTE_EXPECTED_NO_VBS); + } + } else { + // has expected vertex buffers, and vertex buffers still exist + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + if (pip->cmn.vertex_buffer_layout_active[i]) { + _SG_VALIDATE(bindings->vertex_buffers[i].id != SG_INVALID_ID, VALIDATE_ABND_EXPECTED_VB); + // buffers in vertex-buffer-slots must be of type SG_BUFFERTYPE_VERTEXBUFFER + if (bindings->vertex_buffers[i].id != SG_INVALID_ID) { + const _sg_buffer_t* buf = _sg_lookup_buffer(&_sg.pools, bindings->vertex_buffers[i].id); + _SG_VALIDATE(buf != 0, VALIDATE_ABND_VB_EXISTS); + if (buf && buf->slot.state == SG_RESOURCESTATE_VALID) { + _SG_VALIDATE(SG_BUFFERTYPE_VERTEXBUFFER == buf->cmn.type, VALIDATE_ABND_VB_TYPE); + _SG_VALIDATE(!buf->cmn.append_overflow, VALIDATE_ABND_VB_OVERFLOW); + } } } } } - // index buffer expected or not, and index buffer still exists - if (pip->cmn.index_type == SG_INDEXTYPE_NONE) { - // pipeline defines non-indexed rendering, but index buffer provided - _SG_VALIDATE(bindings->index_buffer.id == SG_INVALID_ID, VALIDATE_ABND_IB); + if (_sg.cur_pass.is_compute) { + _SG_VALIDATE(bindings->index_buffer.id == SG_INVALID_ID, VALIDATE_ABND_COMPUTE_EXPECTED_NO_IB); } else { - // pipeline defines indexed rendering, but no index buffer provided - _SG_VALIDATE(bindings->index_buffer.id != SG_INVALID_ID, VALIDATE_ABND_NO_IB); - } - if (bindings->index_buffer.id != SG_INVALID_ID) { - // buffer in index-buffer-slot must be of type SG_BUFFERTYPE_INDEXBUFFER - const _sg_buffer_t* buf = _sg_lookup_buffer(&_sg.pools, bindings->index_buffer.id); - _SG_VALIDATE(buf != 0, VALIDATE_ABND_IB_EXISTS); - if (buf && buf->slot.state == SG_RESOURCESTATE_VALID) { - _SG_VALIDATE(SG_BUFFERTYPE_INDEXBUFFER == buf->cmn.type, VALIDATE_ABND_IB_TYPE); - _SG_VALIDATE(!buf->cmn.append_overflow, VALIDATE_ABND_IB_OVERFLOW); + // index buffer expected or not, and index buffer still exists + if (pip->cmn.index_type == SG_INDEXTYPE_NONE) { + // pipeline defines non-indexed rendering, but index buffer provided + _SG_VALIDATE(bindings->index_buffer.id == SG_INVALID_ID, VALIDATE_ABND_IB); + } else { + // pipeline defines indexed rendering, but no index buffer provided + _SG_VALIDATE(bindings->index_buffer.id != SG_INVALID_ID, VALIDATE_ABND_NO_IB); + } + if (bindings->index_buffer.id != SG_INVALID_ID) { + // buffer in index-buffer-slot must be of type SG_BUFFERTYPE_INDEXBUFFER + const _sg_buffer_t* buf = _sg_lookup_buffer(&_sg.pools, bindings->index_buffer.id); + _SG_VALIDATE(buf != 0, VALIDATE_ABND_IB_EXISTS); + if (buf && buf->slot.state == SG_RESOURCESTATE_VALID) { + _SG_VALIDATE(SG_BUFFERTYPE_INDEXBUFFER == buf->cmn.type, VALIDATE_ABND_IB_TYPE); + _SG_VALIDATE(!buf->cmn.append_overflow, VALIDATE_ABND_IB_OVERFLOW); + } } } @@ -17426,6 +18538,10 @@ _SOKOL_PRIVATE bool _sg_validate_apply_bindings(const sg_bindings* bindings) { _SG_VALIDATE(sbuf != 0, VALIDATE_ABND_STORAGEBUFFER_EXISTS); if (sbuf) { _SG_VALIDATE(sbuf->cmn.type == SG_BUFFERTYPE_STORAGEBUFFER, VALIDATE_ABND_STORAGEBUFFER_BINDING_BUFFERTYPE); + // read/write bindings are only allowed for immutable buffers + if (!shd->cmn.storage_buffers[i].readonly) { + _SG_VALIDATE(sbuf->cmn.usage == SG_USAGE_IMMUTABLE, VALIDATE_ABND_STORAGEBUFFER_READWRITE_IMMUTABLE); + } } } } @@ -17445,19 +18561,60 @@ _SOKOL_PRIVATE bool _sg_validate_apply_uniforms(int ub_slot, const sg_range* dat } SOKOL_ASSERT((ub_slot >= 0) && (ub_slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS)); _sg_validate_begin(); - _SG_VALIDATE(_sg.cur_pipeline.id != SG_INVALID_ID, VALIDATE_AUB_NO_PIPELINE); + _SG_VALIDATE(_sg.cur_pass.in_pass, VALIDATE_AU_PASS_EXPECTED); + _SG_VALIDATE(_sg.cur_pipeline.id != SG_INVALID_ID, VALIDATE_AU_NO_PIPELINE); const _sg_pipeline_t* pip = _sg_lookup_pipeline(&_sg.pools, _sg.cur_pipeline.id); SOKOL_ASSERT(pip && (pip->slot.id == _sg.cur_pipeline.id)); SOKOL_ASSERT(pip->shader && (pip->shader->slot.id == pip->cmn.shader_id.id)); const _sg_shader_t* shd = pip->shader; - _SG_VALIDATE(shd->cmn.uniform_blocks[ub_slot].stage != SG_SHADERSTAGE_NONE, VALIDATE_AUB_NO_UNIFORMBLOCK_AT_SLOT); - _SG_VALIDATE(data->size == shd->cmn.uniform_blocks[ub_slot].size, VALIDATE_AUB_SIZE); + _SG_VALIDATE(shd->cmn.uniform_blocks[ub_slot].stage != SG_SHADERSTAGE_NONE, VALIDATE_AU_NO_UNIFORMBLOCK_AT_SLOT); + _SG_VALIDATE(data->size == shd->cmn.uniform_blocks[ub_slot].size, VALIDATE_AU_SIZE); return _sg_validate_end(); #endif } +_SOKOL_PRIVATE bool _sg_validate_draw(int base_element, int num_elements, int num_instances) { + #if !defined(SOKOL_DEBUG) + _SOKOL_UNUSED(base_element); + _SOKOL_UNUSED(num_elements); + _SOKOL_UNUSED(num_instances); + return true; + #else + if (_sg.desc.disable_validation) { + return true; + } + _sg_validate_begin(); + _SG_VALIDATE(_sg.cur_pass.in_pass && !_sg.cur_pass.is_compute, VALIDATE_DRAW_RENDERPASS_EXPECTED); + _SG_VALIDATE(base_element >= 0, VALIDATE_DRAW_BASEELEMENT); + _SG_VALIDATE(num_elements >= 0, VALIDATE_DRAW_NUMELEMENTS); + _SG_VALIDATE(num_instances >= 0, VALIDATE_DRAW_NUMINSTANCES); + _SG_VALIDATE(_sg.required_bindings_and_uniforms == _sg.applied_bindings_and_uniforms, VALIDATE_DRAW_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING); + return _sg_validate_end(); + #endif +} + +_SOKOL_PRIVATE bool _sg_validate_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + #if !defined(SOKOL_DEBUG) + _SOKOL_UNUSED(num_groups_x); + _SOKOL_UNUSED(num_groups_y); + _SOKOL_UNUSED(num_groups_z); + return true; + #else + if (_sg.desc.disable_validation) { + return true; + } + _sg_validate_begin(); + _SG_VALIDATE(_sg.cur_pass.in_pass && _sg.cur_pass.is_compute, VALIDATE_DISPATCH_COMPUTEPASS_EXPECTED); + _SG_VALIDATE((num_groups_x >= 0) && (num_groups_x < (1<<16)), VALIDATE_DISPATCH_NUMGROUPSX); + _SG_VALIDATE((num_groups_y >= 0) && (num_groups_y < (1<<16)), VALIDATE_DISPATCH_NUMGROUPSY); + _SG_VALIDATE((num_groups_z >= 0) && (num_groups_z < (1<<16)), VALIDATE_DISPATCH_NUMGROUPSZ); + _SG_VALIDATE(_sg.required_bindings_and_uniforms == _sg.applied_bindings_and_uniforms, VALIDATE_DRAW_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING); + return _sg_validate_end(); + #endif +} + _SOKOL_PRIVATE bool _sg_validate_update_buffer(const _sg_buffer_t* buf, const sg_range* data) { #if !defined(SOKOL_DEBUG) _SOKOL_UNUSED(buf); @@ -17532,8 +18689,6 @@ _SOKOL_PRIVATE sg_buffer_desc _sg_buffer_desc_defaults(const sg_buffer_desc* des def.usage = _sg_def(def.usage, SG_USAGE_IMMUTABLE); if (def.size == 0) { def.size = def.data.size; - } else if (def.data.size == 0) { - def.data.size = def.size; } return def; } @@ -17574,9 +18729,11 @@ _SOKOL_PRIVATE sg_shader_desc _sg_shader_desc_defaults(const sg_shader_desc* des #if defined(SOKOL_METAL) def.vertex_func.entry = _sg_def(def.vertex_func.entry, "_main"); def.fragment_func.entry = _sg_def(def.fragment_func.entry, "_main"); + def.compute_func.entry = _sg_def(def.compute_func.entry, "_main"); #else def.vertex_func.entry = _sg_def(def.vertex_func.entry, "main"); def.fragment_func.entry = _sg_def(def.fragment_func.entry, "main"); + def.compute_func.entry = _sg_def(def.compute_func.entry, "main"); #endif #if defined(SOKOL_D3D11) if (def.vertex_func.source) { @@ -17585,7 +18742,12 @@ _SOKOL_PRIVATE sg_shader_desc _sg_shader_desc_defaults(const sg_shader_desc* des if (def.fragment_func.source) { def.fragment_func.d3d11_target = _sg_def(def.fragment_func.d3d11_target, "ps_4_0"); } + if (def.compute_func.source) { + def.compute_func.d3d11_target = _sg_def(def.fragment_func.d3d11_target,"cs_5_0"); + } #endif + def.mtl_threads_per_threadgroup.y = _sg_def(desc->mtl_threads_per_threadgroup.y, 1); + def.mtl_threads_per_threadgroup.z = _sg_def(desc->mtl_threads_per_threadgroup.z, 1); for (size_t ub_index = 0; ub_index < SG_MAX_UNIFORMBLOCK_BINDSLOTS; ub_index++) { sg_shader_uniform_block* ub_desc = &def.uniform_blocks[ub_index]; if (ub_desc->stage != SG_SHADERSTAGE_NONE) { @@ -17618,6 +18780,8 @@ _SOKOL_PRIVATE sg_shader_desc _sg_shader_desc_defaults(const sg_shader_desc* des _SOKOL_PRIVATE sg_pipeline_desc _sg_pipeline_desc_defaults(const sg_pipeline_desc* desc) { sg_pipeline_desc def = *desc; + // FIXME: should we actually do all this stuff for a compute pipeline? + def.primitive_type = _sg_def(def.primitive_type, SG_PRIMITIVETYPE_TRIANGLES); def.index_type = _sg_def(def.index_type, SG_INDEXTYPE_NONE); def.cull_mode = _sg_def(def.cull_mode, SG_CULLMODE_NONE); @@ -17788,37 +18952,37 @@ _SOKOL_PRIVATE sg_attachments _sg_alloc_attachments(void) { _SOKOL_PRIVATE void _sg_dealloc_buffer(_sg_buffer_t* buf) { SOKOL_ASSERT(buf && (buf->slot.state == SG_RESOURCESTATE_ALLOC) && (buf->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.buffer_pool, _sg_slot_index(buf->slot.id)); - _sg_reset_slot(&buf->slot); + _sg_slot_reset(&buf->slot); } _SOKOL_PRIVATE void _sg_dealloc_image(_sg_image_t* img) { SOKOL_ASSERT(img && (img->slot.state == SG_RESOURCESTATE_ALLOC) && (img->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.image_pool, _sg_slot_index(img->slot.id)); - _sg_reset_slot(&img->slot); + _sg_slot_reset(&img->slot); } _SOKOL_PRIVATE void _sg_dealloc_sampler(_sg_sampler_t* smp) { SOKOL_ASSERT(smp && (smp->slot.state == SG_RESOURCESTATE_ALLOC) && (smp->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.sampler_pool, _sg_slot_index(smp->slot.id)); - _sg_reset_slot(&smp->slot); + _sg_slot_reset(&smp->slot); } _SOKOL_PRIVATE void _sg_dealloc_shader(_sg_shader_t* shd) { SOKOL_ASSERT(shd && (shd->slot.state == SG_RESOURCESTATE_ALLOC) && (shd->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.shader_pool, _sg_slot_index(shd->slot.id)); - _sg_reset_slot(&shd->slot); + _sg_slot_reset(&shd->slot); } _SOKOL_PRIVATE void _sg_dealloc_pipeline(_sg_pipeline_t* pip) { SOKOL_ASSERT(pip && (pip->slot.state == SG_RESOURCESTATE_ALLOC) && (pip->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.pipeline_pool, _sg_slot_index(pip->slot.id)); - _sg_reset_slot(&pip->slot); + _sg_slot_reset(&pip->slot); } _SOKOL_PRIVATE void _sg_dealloc_attachments(_sg_attachments_t* atts) { SOKOL_ASSERT(atts && (atts->slot.state == SG_RESOURCESTATE_ALLOC) && (atts->slot.id != SG_INVALID_ID)); _sg_pool_free_index(&_sg.pools.attachments_pool, _sg_slot_index(atts->slot.id)); - _sg_reset_slot(&atts->slot); + _sg_slot_reset(&atts->slot); } _SOKOL_PRIVATE void _sg_init_buffer(_sg_buffer_t* buf, const sg_buffer_desc* desc) { @@ -18046,6 +19210,29 @@ _SOKOL_PRIVATE bool _sg_remove_commit_listener(const sg_commit_listener* listene return false; } +_SOKOL_PRIVATE void _sg_setup_compute(const sg_desc* desc) { + SOKOL_ASSERT(desc && (desc->max_dispatch_calls_per_pass > 0)); + const uint32_t max_tracked_sbufs = (uint32_t)desc->max_dispatch_calls_per_pass * SG_MAX_STORAGEBUFFER_BINDSLOTS; + _sg_tracker_init(&_sg.compute.readwrite_sbufs, max_tracked_sbufs); +} + +_SOKOL_PRIVATE void _sg_discard_compute(void) { + _sg_tracker_discard(&_sg.compute.readwrite_sbufs); +} + +_SOKOL_PRIVATE void _sg_compute_pass_track_storage_buffer(_sg_buffer_t* sbuf, bool readonly) { + SOKOL_ASSERT(sbuf); + if (!readonly) { + _sg_tracker_add(&_sg.compute.readwrite_sbufs, sbuf->slot.id); + } +} + +_SOKOL_PRIVATE void _sg_compute_on_endpass(void) { + SOKOL_ASSERT(_sg.cur_pass.in_pass); + SOKOL_ASSERT(_sg.cur_pass.is_compute); + _sg_tracker_reset(&_sg.compute.readwrite_sbufs); +} + _SOKOL_PRIVATE sg_desc _sg_desc_defaults(const sg_desc* desc) { /* NOTE: on WebGPU, the default color pixel format MUST be provided, @@ -18068,6 +19255,7 @@ _SOKOL_PRIVATE sg_desc _sg_desc_defaults(const sg_desc* desc) { res.pipeline_pool_size = _sg_def(res.pipeline_pool_size, _SG_DEFAULT_PIPELINE_POOL_SIZE); res.attachments_pool_size = _sg_def(res.attachments_pool_size, _SG_DEFAULT_ATTACHMENTS_POOL_SIZE); res.uniform_buffer_size = _sg_def(res.uniform_buffer_size, _SG_DEFAULT_UB_SIZE); + res.max_dispatch_calls_per_pass = _sg_def(res.max_dispatch_calls_per_pass, _SG_DEFAULT_MAX_DISPATCH_CALLS_PER_PASS); res.max_commit_listeners = _sg_def(res.max_commit_listeners, _SG_DEFAULT_MAX_COMMIT_LISTENERS); res.wgpu_bindgroups_cache_size = _sg_def(res.wgpu_bindgroups_cache_size, _SG_DEFAULT_WGPU_BINDGROUP_CACHE_SIZE); return res; @@ -18075,13 +19263,15 @@ _SOKOL_PRIVATE sg_desc _sg_desc_defaults(const sg_desc* desc) { _SOKOL_PRIVATE sg_pass _sg_pass_defaults(const sg_pass* pass) { sg_pass res = *pass; - if (res.attachments.id == SG_INVALID_ID) { - // this is a swapchain-pass - res.swapchain.sample_count = _sg_def(res.swapchain.sample_count, _sg.desc.environment.defaults.sample_count); - res.swapchain.color_format = _sg_def(res.swapchain.color_format, _sg.desc.environment.defaults.color_format); - res.swapchain.depth_format = _sg_def(res.swapchain.depth_format, _sg.desc.environment.defaults.depth_format); + if (!res.compute) { + if (res.compute && res.attachments.id == SG_INVALID_ID) { + // this is a swapchain-pass + res.swapchain.sample_count = _sg_def(res.swapchain.sample_count, _sg.desc.environment.defaults.sample_count); + res.swapchain.color_format = _sg_def(res.swapchain.color_format, _sg.desc.environment.defaults.color_format); + res.swapchain.depth_format = _sg_def(res.swapchain.depth_format, _sg.desc.environment.defaults.depth_format); + } + res.action = _sg_pass_action_defaults(&res.action); } - res.action = _sg_pass_action_defaults(&res.action); return res; } @@ -18099,6 +19289,7 @@ SOKOL_API_IMPL void sg_setup(const sg_desc* desc) { _SG_CLEAR_ARC_STRUCT(_sg_state_t, _sg); _sg.desc = _sg_desc_defaults(desc); _sg_setup_pools(&_sg.pools, &_sg.desc); + _sg_setup_compute(&_sg.desc); _sg_setup_commit_listeners(&_sg.desc); _sg.frame_index = 1; _sg.stats_enabled = true; @@ -18110,6 +19301,7 @@ SOKOL_API_IMPL void sg_shutdown(void) { _sg_discard_all_resources(&_sg.pools); _sg_discard_backend(); _sg_discard_commit_listeners(); + _sg_discard_compute(); _sg_discard_pools(&_sg.pools); _SG_CLEAR_ARC_STRUCT(_sg_state_t, _sg); } @@ -18803,38 +19995,45 @@ SOKOL_API_IMPL void sg_begin_pass(const sg_pass* pass) { if (!_sg_validate_begin_pass(&pass_def)) { return; } - if (pass_def.attachments.id != SG_INVALID_ID) { - // an offscreen pass - SOKOL_ASSERT(_sg.cur_pass.atts == 0); - _sg.cur_pass.atts = _sg_lookup_attachments(&_sg.pools, pass_def.attachments.id); - if (0 == _sg.cur_pass.atts) { - _SG_ERROR(BEGINPASS_ATTACHMENT_INVALID); - return; + if (!pass_def.compute) { + if (pass_def.attachments.id != SG_INVALID_ID) { + // an offscreen pass + SOKOL_ASSERT(_sg.cur_pass.atts == 0); + _sg.cur_pass.atts = _sg_lookup_attachments(&_sg.pools, pass_def.attachments.id); + if (0 == _sg.cur_pass.atts) { + _SG_ERROR(BEGINPASS_ATTACHMENT_INVALID); + return; + } + _sg.cur_pass.atts_id = pass_def.attachments; + _sg.cur_pass.width = _sg.cur_pass.atts->cmn.width; + _sg.cur_pass.height = _sg.cur_pass.atts->cmn.height; + } else { + // a swapchain pass + SOKOL_ASSERT(pass_def.swapchain.width > 0); + SOKOL_ASSERT(pass_def.swapchain.height > 0); + SOKOL_ASSERT(pass_def.swapchain.color_format > SG_PIXELFORMAT_NONE); + SOKOL_ASSERT(pass_def.swapchain.sample_count > 0); + _sg.cur_pass.width = pass_def.swapchain.width; + _sg.cur_pass.height = pass_def.swapchain.height; + _sg.cur_pass.swapchain.color_fmt = pass_def.swapchain.color_format; + _sg.cur_pass.swapchain.depth_fmt = pass_def.swapchain.depth_format; + _sg.cur_pass.swapchain.sample_count = pass_def.swapchain.sample_count; } - _sg.cur_pass.atts_id = pass_def.attachments; - _sg.cur_pass.width = _sg.cur_pass.atts->cmn.width; - _sg.cur_pass.height = _sg.cur_pass.atts->cmn.height; - } else { - // a swapchain pass - SOKOL_ASSERT(pass_def.swapchain.width > 0); - SOKOL_ASSERT(pass_def.swapchain.height > 0); - SOKOL_ASSERT(pass_def.swapchain.color_format > SG_PIXELFORMAT_NONE); - SOKOL_ASSERT(pass_def.swapchain.sample_count > 0); - _sg.cur_pass.width = pass_def.swapchain.width; - _sg.cur_pass.height = pass_def.swapchain.height; - _sg.cur_pass.swapchain.color_fmt = pass_def.swapchain.color_format; - _sg.cur_pass.swapchain.depth_fmt = pass_def.swapchain.depth_format; - _sg.cur_pass.swapchain.sample_count = pass_def.swapchain.sample_count; } _sg.cur_pass.valid = true; // may be overruled by backend begin-pass functions _sg.cur_pass.in_pass = true; + _sg.cur_pass.is_compute = pass_def.compute; _sg_begin_pass(&pass_def); _SG_TRACE_ARGS(begin_pass, &pass_def); } SOKOL_API_IMPL void sg_apply_viewport(int x, int y, int width, int height, bool origin_top_left) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); + #if defined(SOKOL_DEBUG) + if (!_sg_validate_apply_viewport(x, y, width, height, origin_top_left)) { + return; + } + #endif _sg_stats_add(num_apply_viewport, 1); if (!_sg.cur_pass.valid) { return; @@ -18849,7 +20048,11 @@ SOKOL_API_IMPL void sg_apply_viewportf(float x, float y, float width, float heig SOKOL_API_IMPL void sg_apply_scissor_rect(int x, int y, int width, int height, bool origin_top_left) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); + #if defined(SOKOL_DEBUG) + if (!_sg_validate_apply_scissor_rect(x, y, width, height, origin_top_left)) { + return; + } + #endif _sg_stats_add(num_apply_scissor_rect, 1); if (!_sg.cur_pass.valid) { return; @@ -18864,7 +20067,6 @@ SOKOL_API_IMPL void sg_apply_scissor_rectf(float x, float y, float width, float SOKOL_API_IMPL void sg_apply_pipeline(sg_pipeline pip_id) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); _sg_stats_add(num_apply_pipeline, 1); if (!_sg_validate_apply_pipeline(pip_id)) { _sg.next_draw_valid = false; @@ -18894,7 +20096,6 @@ SOKOL_API_IMPL void sg_apply_pipeline(sg_pipeline pip_id) { SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); SOKOL_ASSERT(bindings); SOKOL_ASSERT((bindings->_start_canary == 0) && (bindings->_end_canary==0)); _sg_stats_add(num_apply_bindings, 1); @@ -18919,31 +20120,32 @@ SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) { SOKOL_ASSERT(bnd.pip->shader && (bnd.pip->cmn.shader_id.id == bnd.pip->shader->slot.id)); const _sg_shader_t* shd = bnd.pip->shader; - for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { - if (bnd.pip->cmn.vertex_buffer_layout_active[i]) { - SOKOL_ASSERT(bindings->vertex_buffers[i].id != SG_INVALID_ID); - bnd.vbs[i] = _sg_lookup_buffer(&_sg.pools, bindings->vertex_buffers[i].id); - bnd.vb_offsets[i] = bindings->vertex_buffer_offsets[i]; - if (bnd.vbs[i]) { - _sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.vbs[i]->slot.state); - _sg.next_draw_valid &= !bnd.vbs[i]->cmn.append_overflow; + if (!_sg.cur_pass.is_compute) { + for (size_t i = 0; i < SG_MAX_VERTEXBUFFER_BINDSLOTS; i++) { + if (bnd.pip->cmn.vertex_buffer_layout_active[i]) { + SOKOL_ASSERT(bindings->vertex_buffers[i].id != SG_INVALID_ID); + bnd.vbs[i] = _sg_lookup_buffer(&_sg.pools, bindings->vertex_buffers[i].id); + bnd.vb_offsets[i] = bindings->vertex_buffer_offsets[i]; + if (bnd.vbs[i]) { + _sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.vbs[i]->slot.state); + _sg.next_draw_valid &= !bnd.vbs[i]->cmn.append_overflow; + } else { + _sg.next_draw_valid = false; + } + } + } + if (bindings->index_buffer.id) { + bnd.ib = _sg_lookup_buffer(&_sg.pools, bindings->index_buffer.id); + bnd.ib_offset = bindings->index_buffer_offset; + if (bnd.ib) { + _sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.ib->slot.state); + _sg.next_draw_valid &= !bnd.ib->cmn.append_overflow; } else { _sg.next_draw_valid = false; } } } - if (bindings->index_buffer.id) { - bnd.ib = _sg_lookup_buffer(&_sg.pools, bindings->index_buffer.id); - bnd.ib_offset = bindings->index_buffer_offset; - if (bnd.ib) { - _sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.ib->slot.state); - _sg.next_draw_valid &= !bnd.ib->cmn.append_overflow; - } else { - _sg.next_draw_valid = false; - } - } - for (int i = 0; i < SG_MAX_IMAGE_BINDSLOTS; i++) { if (shd->cmn.images[i].stage != SG_SHADERSTAGE_NONE) { SOKOL_ASSERT(bindings->images[i].id != SG_INVALID_ID); @@ -18974,6 +20176,9 @@ SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) { bnd.sbufs[i] = _sg_lookup_buffer(&_sg.pools, bindings->storage_buffers[i].id); if (bnd.sbufs[i]) { _sg.next_draw_valid &= (SG_RESOURCESTATE_VALID == bnd.sbufs[i]->slot.state); + if (_sg.cur_pass.is_compute) { + _sg_compute_pass_track_storage_buffer(bnd.sbufs[i], shd->cmn.storage_buffers[i].readonly); + } } else { _sg.next_draw_valid = false; } @@ -18988,7 +20193,6 @@ SOKOL_API_IMPL void sg_apply_bindings(const sg_bindings* bindings) { SOKOL_API_IMPL void sg_apply_uniforms(int ub_slot, const sg_range* data) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); SOKOL_ASSERT((ub_slot >= 0) && (ub_slot < SG_MAX_UNIFORMBLOCK_BINDSLOTS)); SOKOL_ASSERT(data && data->ptr && (data->size > 0)); _sg_stats_add(num_apply_uniforms, 1); @@ -19010,10 +20214,11 @@ SOKOL_API_IMPL void sg_apply_uniforms(int ub_slot, const sg_range* data) { SOKOL_API_IMPL void sg_draw(int base_element, int num_elements, int num_instances) { SOKOL_ASSERT(_sg.valid); - SOKOL_ASSERT(_sg.cur_pass.in_pass); - SOKOL_ASSERT(base_element >= 0); - SOKOL_ASSERT(num_elements >= 0); - SOKOL_ASSERT(num_instances >= 0); + #if defined(SOKOL_DEBUG) + if (!_sg_validate_draw(base_element, num_elements, num_instances)) { + return; + } + #endif _sg_stats_add(num_draw, 1); if (!_sg.cur_pass.valid) { return; @@ -19021,15 +20226,7 @@ SOKOL_API_IMPL void sg_draw(int base_element, int num_elements, int num_instance if (!_sg.next_draw_valid) { return; } - #if defined(SOKOL_DEBUG) - if (_sg.required_bindings_and_uniforms != _sg.applied_bindings_and_uniforms) { - _SG_ERROR(DRAW_REQUIRED_BINDINGS_OR_UNIFORMS_MISSING); - return; - } - #endif - /* attempting to draw with zero elements or instances is not technically an - error, but might be handled as an error in the backend API (e.g. on Metal) - */ + // skip no-op draws if ((0 == num_elements) || (0 == num_instances)) { return; } @@ -19037,6 +20234,28 @@ SOKOL_API_IMPL void sg_draw(int base_element, int num_elements, int num_instance _SG_TRACE_ARGS(draw, base_element, num_elements, num_instances); } +SOKOL_API_IMPL void sg_dispatch(int num_groups_x, int num_groups_y, int num_groups_z) { + SOKOL_ASSERT(_sg.valid); + #if defined(SOKOL_DEBUG) + if (!_sg_validate_dispatch(num_groups_x, num_groups_y, num_groups_z)) { + return; + } + #endif + _sg_stats_add(num_dispatch, 1); + if (!_sg.cur_pass.valid) { + return; + } + if (!_sg.next_draw_valid) { + return; + } + // skip no-op dispatches + if ((0 == num_groups_x) || (0 == num_groups_y) || (0 == num_groups_z)) { + return; + } + _sg_dispatch(num_groups_x, num_groups_y, num_groups_z); + _SG_TRACE_ARGS(dispatch, num_groups_x, num_groups_y, num_groups_z); +} + SOKOL_API_IMPL void sg_end_pass(void) { SOKOL_ASSERT(_sg.valid); SOKOL_ASSERT(_sg.cur_pass.in_pass); @@ -19044,6 +20263,9 @@ SOKOL_API_IMPL void sg_end_pass(void) { // NOTE: don't exit early if !_sg.cur_pass.valid _sg_end_pass(); _sg.cur_pipeline.id = SG_INVALID_ID; + if (_sg.cur_pass.is_compute) { + _sg_compute_on_endpass(); + } _sg_clear(&_sg.cur_pass, sizeof(_sg.cur_pass)); _SG_TRACE_NOARGS(end_pass); } @@ -19503,6 +20725,7 @@ SOKOL_API_IMPL sg_pipeline_desc sg_query_pipeline_desc(sg_pipeline pip_id) { _sg_clear(&desc, sizeof(desc)); const _sg_pipeline_t* pip = _sg_lookup_pipeline(&_sg.pools, pip_id.id); if (pip) { + desc.compute = pip->cmn.is_compute; desc.shader = pip->cmn.shader_id; desc.layout = pip->cmn.layout; desc.depth = pip->cmn.depth; @@ -19704,8 +20927,20 @@ SOKOL_API_IMPL const void* sg_mtl_device(void) { SOKOL_API_IMPL const void* sg_mtl_render_command_encoder(void) { #if defined(SOKOL_METAL) - if (nil != _sg.mtl.cmd_encoder) { - return (__bridge const void*) _sg.mtl.cmd_encoder; + if (nil != _sg.mtl.render_cmd_encoder) { + return (__bridge const void*) _sg.mtl.render_cmd_encoder; + } else { + return 0; + } + #else + return 0; + #endif +} + +SOKOL_API_IMPL const void* sg_mtl_compute_command_encoder(void) { + #if defined(SOKOL_METAL) + if (nil != _sg.mtl.compute_cmd_encoder) { + return (__bridge const void*) _sg.mtl.compute_cmd_encoder; } else { return 0; } @@ -19847,7 +21082,15 @@ SOKOL_API_IMPL const void* sg_wgpu_command_encoder(void) { SOKOL_API_IMPL const void* sg_wgpu_render_pass_encoder(void) { #if defined(SOKOL_WGPU) - return (const void*) _sg.wgpu.pass_enc; + return (const void*) _sg.wgpu.rpass_enc; + #else + return 0; + #endif +} + +SOKOL_API_IMPL const void* sg_wgpu_compute_pass_encoder(void) { + #if defined(SOKOL_WGPU) + return (const void*) _sg.wgpu.cpass_enc; #else return 0; #endif @@ -19923,7 +21166,8 @@ SOKOL_API_IMPL sg_wgpu_pipeline_info sg_wgpu_query_pipeline_info(sg_pipeline pip #if defined(SOKOL_WGPU) const _sg_pipeline_t* pip = _sg_lookup_pipeline(&_sg.pools, pip_id.id); if (pip) { - res.pip = (const void*) pip->wgpu.pip; + res.render_pipeline = (const void*) pip->wgpu.rpip; + res.compute_pipeline = (const void*) pip->wgpu.cpip; } #else _SOKOL_UNUSED(pip_id); diff --git a/tools/compile_shaders.py b/tools/compile_shaders.py new file mode 100644 index 0000000..f9a5ace --- /dev/null +++ b/tools/compile_shaders.py @@ -0,0 +1,157 @@ +#!/usr/bin/env python3 +# -*- coding: utf-8 -*- + +import argparse +import subprocess +import sys +from pathlib import Path +from typing import List, Tuple, Iterator + +# 支持的后端列表 +BACKENDS = [ + "glsl", # SG_BACKEND_GLCORE + "essl", # SG_BACKEND_GLES3 + "hlsl", # SG_BACKEND_D3D11 + "metal", # SG_BACKEND_METAL_IOS, SG_BACKEND_METAL_MACOS, SG_BACKEND_METAL_SIMULATOR + "wgsl", # SG_BACKEND_WGPU +] + +SHADER_EXTENSIONS = { + 'glsl': 'glsl', + 'essl': 'glsl', + 'hlsl': 'dxil', + 'metal': 'metallib', + 'wgsl': 'wgsl' +} + +TARGET_PROFILES = { + # 'glsl': ['-profile', 'glsl_460'], + # 'essl': ['-profile', 'glsl_300es'], + # 'hlsl': ['-profile', 'sm_5_0'], + # 'metal': ['-capability', 'metallib'], + # 'wgsl': ['-profile', 'wgsl'] +} + +def need_recompile(input_file: Path, output_path: Path) -> bool: + """检查是否需要重新编译""" + # 着色器输出文件名的格式为input.file.stem + '.' + backend + '.h' + # 所以需要检查是否有input.file.stem + '.*.h'文件存在 + for backend in BACKENDS: + output_file = output_path / f"{input_file.stem}.{backend}.h" + if not output_file.exists(): + continue + else: + input_time = input_file.stat().st_mtime + output_time = output_file.stat().st_mtime + return input_time > output_time + return True + + +def find_shader_files(input_dir: Path, extensions: List[str]) -> Iterator[Path]: + """递归查找指定目录下的着色器文件""" + for file_path in Path(input_dir).rglob('*'): + if file_path.suffix in extensions: + yield file_path + +def create_compiler_command( + input_file: Path, + output_file: Path, + target_type: str, + args: argparse.Namespace +) -> List[str]: + """生成着色器编译命令""" + cmd = [args.shdc, + str(input_file), + "-o", str(output_file), + "-t", target_type, + ] + + if args.debug: + cmd.append('-d') + + if target_type in TARGET_PROFILES: + cmd.extend(TARGET_PROFILES[target_type]) + + return cmd + +def compile_shader( + input_file: Path, + target_types: List[Tuple[str, bool]], + args: argparse.Namespace +) -> bool: + """编译单个着色器文件并读取二进制数据""" + try: + base = input_file.stem + output_dir = input_file.parent + success = True + + for target_type, enabled in target_types: + if not enabled: + continue + if not need_recompile(input_file, output_dir): + print(f"**跳过**: {input_file} 已经是最新的") + continue + + cmd = create_compiler_command(input_file, output_dir, target_type, args) + try: + # 修改这里: 明确指定编码为utf-8,并添加errors参数处理无法解码的字符 + subprocess.run(cmd, check=True, capture_output=True, text=True, encoding='utf-8', errors='replace') + print(f"**成功**: 编译 {input_file}") + except subprocess.CalledProcessError as e: + print(f"**错误**: 编译 {input_file} 失败") + print(e.stderr) + success = False + continue + + return success + except Exception as e: + print(f"**错误**: 处理 {input_file} 时发生异常: {e}") + return False + +def main(): + parser = argparse.ArgumentParser(description='编译slang着色器为C++头文件') + parser.add_argument('--shdc', help='着色器编译器路径') + parser.add_argument('--shader_list', help='着色器目录列表文件路径') + parser.add_argument("--hlsl", action="store_true", help="编译HLSL着色器") + parser.add_argument("--glsl", action="store_true", help="编译GLSL着色器") + parser.add_argument("--essl", action="store_true", help="编译ESSL着色器") + parser.add_argument("--metal", action="store_true", help="编译Metal着色器") + parser.add_argument("--wgsl", action="store_true", help="编译WGSL着色器") + parser.add_argument("--debug", action="store_true", help="编译调试版本") + args = parser.parse_args() + + # 确定要编译的目标后端 + target_types: List[Tuple[str, bool]] = [ + ('glsl', args.glsl), + ('essl', args.essl), + ('hlsl', args.hlsl), + ('metal', args.metal), + ('wgsl', args.wgsl), + ] + + # 如果没有指定任何后端,默认启用所有后端 + if not any(enabled for _, enabled in target_types): + target_types = [(backend, True) for backend, _ in target_types] + + shader_list = Path(args.shader_list or "shader_paths.txt") + + try: + shader_paths = shader_list.read_text(encoding="utf-8").splitlines() + except Exception as e: + print(f"**错误**: 读取着色器列表文件 {shader_list} 失败: {e}") + sys.exit(1) + + all_success = True + for shader_path in shader_paths: + shader_path = shader_path.strip() + if not shader_path: + continue + for file in find_shader_files(Path(shader_path), ['.slang']): + if not compile_shader(file, target_types, args): + all_success = False + + if not all_success: + sys.exit(1) + +if __name__ == "__main__": + main() diff --git a/tools/mirage_shdc.exe b/tools/mirage_shdc.exe new file mode 100644 index 0000000..6ae8484 Binary files /dev/null and b/tools/mirage_shdc.exe differ