diff --git a/CMakeLists.txt b/CMakeLists.txt index aa660d0..c4d3d32 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -10,6 +10,8 @@ if (WIN32) set(RTAUDIO_API_DS OFF CACHE BOOL "" FORCE) elseif(APPLE) set(RTAUDIO_API_CORE ON CACHE BOOL "" FORCE) +else() + set(RTAUDIO_API_ALSA ON CACHE BOOL "" FORCE) endif() # setup spdlog @@ -37,4 +39,3 @@ add_subdirectory(third_party/spdlog) add_subdirectory(third_party/mempool) add_subdirectory(third_party/taskflow) add_subdirectory(third_party/glfw) - diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt index d65283a..e83b2b0 100644 --- a/core/CMakeLists.txt +++ b/core/CMakeLists.txt @@ -1,6 +1,6 @@ cmake_minimum_required(VERSION 3.5) project(core) -set(CMAKE_CXX_STANDARD 23) +set(CMAKE_CXX_STANDARD 26) set(ALL_FILES "") retrieve_files(${CMAKE_CURRENT_SOURCE_DIR} ALL_FILES) @@ -33,6 +33,13 @@ elseif(UNIX AND NOT APPLE) target_compile_definitions(${PROJECT_NAME} PUBLIC PLATFORM_WINDOWS=0 PLATFORM_MACOS=0 PLATFORM_LINUX=1 GLFW_EXPOSE_NATIVE_X11) endif() +# cpu amd or arm +if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*") + target_compile_definitions(${PROJECT_NAME} PUBLIC CPU_AMD64=1 CPU_ARM=0) +elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*|ARM.*") + target_compile_definitions(${PROJECT_NAME} PUBLIC CPU_AMD64=0 CPU_ARM=1) +endif() + if (CMAKE_BUILD_TYPE MATCHES "Debug") target_compile_definitions(${PROJECT_NAME} PUBLIC BUILD_DEBUG=1) else() diff --git a/core/audio/misc/audio_buffer.cpp b/core/audio/misc/audio_buffer.cpp index d9e9b96..ba6c342 100644 --- a/core/audio/misc/audio_buffer.cpp +++ b/core/audio/misc/audio_buffer.cpp @@ -1,57 +1,159 @@ #include "audio_buffer.h" #include +#include -void audio_buffer::resize(uint32_t channel_num, uint32_t block_size) { - std::scoped_lock lock(lock_); - buffer_.resize(channel_num); - headers_.resize(channel_num); - for (uint32_t i = 0; i < channel_num; i++) { - buffer_[i].resize(block_size); - headers_[i] = buffer_[i].data(); +#include "audio_buffer_pool.h" +#include "misc/cpu_simd.h" +#include "misc/likely.h" + +void(*audio_buffer::add_func)(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent); +void(*audio_buffer::multiple_func)(audio_buffer& in_buffer, float percent); + +template +void add_simd(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent) { + using namespace std::experimental; + using simd_type = simd_abi::fixed_size; + + simd percent_simd(percent); + for (uint32_t channel_index = 0; channel_index < in_buffer.get_num_channels(); channel_index++) { + sample_t* channel = in_buffer.get_headers()[channel_index]; + sample_t* in_channel = from_buffer.get_headers()[channel_index]; + int i = 0; + for (; i < in_buffer.get_num_samples(); i += simd_size) { + simd a(channel, element_aligned); + simd b(in_channel, element_aligned); + a += b * percent_simd; + a.copy_to(channel, element_aligned); + + channel += simd_size; + in_channel += simd_size; + } + // if the number of samples is not a multiple of simd_size + for (; i < in_buffer.get_num_samples(); ++i) { + channel[i] += in_channel[i] * percent; + } } - for (auto& channel : buffer_) { - std::memset(channel.data(), 0, channel.size() * sizeof(sample_t)); - } +} + +void add_no_simd(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent) { + for (uint32_t channel_index = 0; channel_index < in_buffer.get_num_channels(); channel_index++) { + sample_t* channel = in_buffer.get_headers()[channel_index]; + sample_t* in_channel = from_buffer.get_headers()[channel_index]; + for (int i = 0; i < in_buffer.get_num_samples(); ++i) { + channel[i] += in_channel[i] * percent; + } + } +} + +template +void multiple_simd(audio_buffer& in_buffer, float percent) { + using namespace std::experimental; + using simd_type = simd_abi::fixed_size; + + simd percent_simd(percent); + for (auto channel : in_buffer.get_headers_vector()) { + int i = 0; + for (; i < in_buffer.get_num_samples(); i += simd_size) { + simd a(channel, element_aligned); + a *= percent_simd; + a.copy_to(channel, element_aligned); + channel += simd_size; + } + // if the number of samples is not a multiple of simd_size + for (; i < in_buffer.get_num_samples(); ++i) { + channel[i] *= percent; + } + } +} + +void multiple_no_simd(audio_buffer& in_buffer, float percent) { + for (auto channel : in_buffer.get_headers_vector()) { + for (int i = 0; i < in_buffer.get_num_samples(); ++i) { + channel[i] *= percent; + } + } +} + +audio_buffer::audio_buffer() { + using namespace std::experimental; + static bool func_initialized = false; + if (UNLIKELY(!func_initialized)) { + cpuid cpu; +#define DEFINE_SIMD_FUNC(simd_max) \ + constexpr size_t simd_size = simd_max / sizeof(sample_t) / 8; \ + add_func = &add_simd; \ + multiple_func = &multiple_simd; +#if CPU_AMD64 + if (cpu.support_avx512()) { + DEFINE_SIMD_FUNC(512) + } else if (cpu.support_avx() || cpu.support_avx2()) { + DEFINE_SIMD_FUNC(256) + } else if (cpu.support_sse()) { + DEFINE_SIMD_FUNC(128) + } +#endif +#if CPU_ARM + if (cpu.support_neon128()) { + DEFINE_SIMD_FUNC(128) + } else if (cpu.support_neon64()) { + DEFINE_SIMD_FUNC(64) + } +#endif + if (!add_func) { + add_func = &add_no_simd; + multiple_func = &multiple_no_simd; + } + func_initialized = true; + } +#undef DEFINE_SIMD_FUNC +} + +audio_buffer::~audio_buffer() { + free(); +} + +void audio_buffer::resize(uint32_t channel_num, uint32_t frame_size) { + frame_size_ = frame_size; + free(); + audio_buffer_pool* pool = get_audio_buffer_pool(); + for (int i = 0; i < channel_num; ++i) { + sample_t* block = pool->alloc(frame_size); + headers_.push_back(block); + } + clear(); } void audio_buffer::clear() { - std::scoped_lock lock(lock_); - for (auto& channel : buffer_) { - std::memset(channel.data(), 0, channel.size() * sizeof(sample_t)); + for (sample_t* channel : headers_) { + std::memset(channel, 0, frame_size_ * sizeof(sample_t)); } } -void audio_buffer::mix(audio_buffer& from_buffer, float percent) { + +void audio_buffer::add(audio_buffer& from_buffer, float percent) { std::scoped_lock lock(lock_); - // will be optimized by compiler - for (uint32_t channel_index = 0; channel_index < buffer_.size(); channel_index++) { - auto& channel = buffer_[channel_index]; - auto& in_channel = from_buffer.buffer_[channel_index]; - for (uint32_t sample_index = 0; sample_index < channel.size(); sample_index++) { - channel[sample_index] += in_channel[sample_index] * percent; - } - } + add_func(*this, from_buffer, percent); } void audio_buffer::multiple(float percent) { std::scoped_lock lock(lock_); - // will be optimized by compiler - for (auto& channel : buffer_) { - for (auto& sample : channel) { - sample *= percent; - } - } + multiple_func(*this, percent); } -std::vector audio_buffer::get_interleaved_buffer() { - std::scoped_lock lock(lock_); +std::vector audio_buffer::get_interleaved_buffer() const { std::vector result; - result.reserve(buffer_[0].size() * buffer_.size()); - for (uint32_t sample_index = 0; sample_index < buffer_[0].size(); sample_index++) { - for (uint32_t channel_index = 0; channel_index < buffer_.size(); channel_index++) { - result.push_back(buffer_[channel_index][sample_index]); + result.reserve(headers_.size() * frame_size_); + for (int i = 0; i < frame_size_; ++i) { + for (const sample_t* channel : headers_) { + result.push_back(channel[i]); } } return result; } + +void audio_buffer::free() { + for (sample_t* header : headers_) + get_audio_buffer_pool()->free(header); + headers_.clear(); +} diff --git a/core/audio/misc/audio_buffer.h b/core/audio/misc/audio_buffer.h index 04eaca8..5776bb5 100644 --- a/core/audio/misc/audio_buffer.h +++ b/core/audio/misc/audio_buffer.h @@ -3,24 +3,32 @@ #include #include #include "extern.h" +#include class CORE_API audio_buffer { public: + static void(*add_func)(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent); + static void(*multiple_func)(audio_buffer& in_buffer, float percent); + + audio_buffer(); + ~audio_buffer(); sample_t** get_headers() { return headers_.data(); } const std::vector& get_headers_vector() { return headers_; } - [[nodiscard]] uint32_t get_num_channels() const { return buffer_.size(); } - [[nodiscard]] uint32_t get_num_samples() const { return buffer_[0].size(); } + [[nodiscard]] uint32_t get_num_channels() const { return headers_.size(); } + [[nodiscard]] uint32_t get_num_samples() const { return frame_size_; } - void resize(uint32_t channel_num, uint32_t block_size); + void resize(uint32_t channel_num, uint32_t frame_size); void clear(); - void mix(audio_buffer& from_buffer, float percent = 1.f); + void add(audio_buffer& from_buffer, float percent = 1.f); void multiple(float percent); - [[nodiscard]] std::vector get_interleaved_buffer(); + [[nodiscard]] std::vector get_interleaved_buffer() const; +private: + void free(); private: - std::vector> buffer_; std::vector headers_{}; std::mutex lock_{}; + uint32_t frame_size_ = 0; }; diff --git a/core/audio/misc/audio_buffer_pool.cpp b/core/audio/misc/audio_buffer_pool.cpp new file mode 100644 index 0000000..1095cc7 --- /dev/null +++ b/core/audio/misc/audio_buffer_pool.cpp @@ -0,0 +1,15 @@ +#include "audio_buffer_pool.h" + +IMPL_SINGLETON_INSTANCE(audio_buffer_pool) + +void audio_buffer_pool::init(singleton_initliazer& initliazer) { +} + +sample_t * audio_buffer_pool::alloc(uint32_t block_size) { + auto* alloc_block = static_cast(pool_.alloc(block_size * sizeof(sample_t))); + return alloc_block; +} + +void audio_buffer_pool::free(sample_t* block) { + pool_.free(block); +} diff --git a/core/audio/misc/audio_buffer_pool.h b/core/audio/misc/audio_buffer_pool.h new file mode 100644 index 0000000..75a712f --- /dev/null +++ b/core/audio/misc/audio_buffer_pool.h @@ -0,0 +1,18 @@ +#pragma once +#include "mempool.h" +#include "misc/singleton/singleton.h" + +class CORE_API audio_buffer_pool : public singleton_t { +public: + void init(singleton_initliazer& initliazer) override; + const char* get_name() override { return "audio_buffer_pool"; } + + sample_t* alloc(uint32_t block_size); + void free(sample_t* block); +protected: + [[nodiscard]] mempool<>& get_pool() { return pool_; } +private: + mempool<> pool_ = mempool(1024000 * 4); // 一个4MB的内存池, 如果一个缓冲区大小为1024个样本, 那么这个内存池可以分配1024 * 4个缓冲区 +}; + +DEFINE_SINGLETON_INSTANCE(audio_buffer_pool) diff --git a/core/misc/cpu_simd.h b/core/misc/cpu_simd.h new file mode 100644 index 0000000..25a419c --- /dev/null +++ b/core/misc/cpu_simd.h @@ -0,0 +1,121 @@ +#pragma once + +#include +#include +#include + +#ifdef _MSC_VER +#include +#else +#include +#endif + +inline void get_cpuid(int info[4], int infoType) { +#ifdef _MSC_VER + __cpuidex(info, infoType, 0); +#else + __cpuid_count(infoType, 0, info[0], info[1], info[2], info[3]); +#endif +} + +enum class simd_type { + sse2, + sse3, + ssse3, + sse41, + sse42, + avx, + avx2, + avx512, + neon64, + neon128, +}; + +inline std::vector get_simd_support_type() { + unsigned int eax, ebx, ecx, edx; + __get_cpuid(1, &eax, &ebx, &ecx, &edx); + std::vector simd_types; +#if CPU_AMD64 + if (ecx & bit_SSE2) { + simd_types.push_back(simd_type::sse2); + } + if (ecx & bit_SSE3) { + simd_types.push_back(simd_type::sse3); + } + if (ecx & bit_SSSE3) { + simd_types.push_back(simd_type::ssse3); + } + if (ecx & bit_SSE4_1) { + simd_types.push_back(simd_type::sse41); + } + if (ecx & bit_SSE4_2) { + simd_types.push_back(simd_type::sse42); + } + if (ecx & bit_AVX) { + simd_types.push_back(simd_type::avx); + } + if (ebx & bit_AVX2) { + simd_types.push_back(simd_type::avx2); + } + if (ebx & bit_AVX512F) { + simd_types.push_back(simd_type::avx512); + } +#endif +#if CPU_ARM + uint64_t id_aa64isar0_ = 0; + uint64_t id_aa64pfr0_ = 0; + + // Reading the ID_AA64ISAR0_EL1 register + asm volatile("mrs %0, ID_AA64ISAR0_EL1" : "=r" (id_aa64isar0_)); + // Reading the ID_AA64PFR0_EL1 register + asm volatile("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_)); + if ((id_aa64isar0_ >> 24) & 0xf) { + simd_types.push_back(simd_type::neon64); + } + if ((id_aa64isar0_ >> 28) & 0xf) { + simd_types.push_back(simd_type::neon128); + } +#endif + + return simd_types; +} + +class CORE_API cpuid { +public: + cpuid() { + simd_types = get_simd_support_type(); + } + [[nodiscard]] bool support_simd(simd_type simd) const { + return std::ranges::find(simd_types, simd) != simd_types.end(); + } + + [[nodiscard]] bool support_sse() const { + return support_simd(simd_type::sse42) || support_simd(simd_type::sse41) || support_simd(simd_type::ssse3) || support_simd(simd_type::sse3) || support_simd(simd_type::sse2); + } + + [[nodiscard]] bool support_avx() const { + return support_simd(simd_type::avx); + } + + [[nodiscard]] bool support_avx2() const { + return support_simd(simd_type::avx2); + } + + [[nodiscard]] bool support_avx512() const { + return support_simd(simd_type::avx512); + } + + [[nodiscard]] bool support_neon() const { + return support_neon64() || support_neon128(); + } + + [[nodiscard]] bool support_neon64() const { + return support_simd(simd_type::neon64); + } + + [[nodiscard]] bool support_neon128() const { + return support_simd(simd_type::neon128); + } +private: + std::vector simd_types; +}; \ No newline at end of file diff --git a/core/thread_message/thread_message_hub.h b/core/thread_message/thread_message_hub.h index ca269b6..c268ade 100644 --- a/core/thread_message/thread_message_hub.h +++ b/core/thread_message/thread_message_hub.h @@ -21,11 +21,11 @@ public: messages_.push(message); } void push_message(const std::function& func) { - lamba_thread_message* message = mem_pool_.alloc(func); + auto* message = mem_pool_.alloc(func); push_message(message); } void process_messages(); private: std::queue messages_; - mempool mem_pool_ = mempool(1024000); + mempool<> mem_pool_ = mempool(1024000); }; diff --git a/third_party/mempool/CMakeLists.txt b/third_party/mempool/CMakeLists.txt index dd9da34..cf570b6 100644 --- a/third_party/mempool/CMakeLists.txt +++ b/third_party/mempool/CMakeLists.txt @@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5) project(mempool) set(CMAKE_C_STANDARD 99) -set(CMAKE_CPP_STANDARD 23) +set(CMAKE_CPP_STANDARD 17) add_library(${PROJECT_NAME} SHARED mempool/ncx_core.h diff --git a/third_party/mempool/mempool.h b/third_party/mempool/mempool.h index 634af2d..5660348 100644 --- a/third_party/mempool/mempool.h +++ b/third_party/mempool/mempool.h @@ -1,19 +1,24 @@ #pragma once #include "ncx_slab.h" +#include +template class mempool { public: - mempool(size_t pool_size = 1024000) { - const auto space = (u_char*)malloc(pool_size); + mempool(size_t pool_size = 1024000) { // 1024KB + // 分配一个缓存对齐的内存 + // const auto space = static_cast(operator new(pool_size, std::align_val_t(alignment))); + const auto space = static_cast(malloc(pool_size)); - mem_pool_ = (ncx_slab_pool_t*)space; - mem_pool_->addr = space; + mem_pool_ = reinterpret_cast(space); + mem_pool_->addr = space; mem_pool_->min_shift = 3; - mem_pool_->end = space + pool_size; + mem_pool_->end = space + pool_size; ncx_slab_init(mem_pool_); } ~mempool() { + // operator delete(mem_pool_->addr, std::align_val_t(alignment)); free(mem_pool_->addr); } [[nodiscard]] void* alloc(size_t size) const {