audio_buffer新增缓冲区池, 优化内存命中率, 新增simd加速计算
This commit is contained in:
parent
f69f3088f2
commit
3ea2f0ab4f
@ -10,6 +10,8 @@ if (WIN32)
|
||||
set(RTAUDIO_API_DS OFF CACHE BOOL "" FORCE)
|
||||
elseif(APPLE)
|
||||
set(RTAUDIO_API_CORE ON CACHE BOOL "" FORCE)
|
||||
else()
|
||||
set(RTAUDIO_API_ALSA ON CACHE BOOL "" FORCE)
|
||||
endif()
|
||||
|
||||
# setup spdlog
|
||||
@ -37,4 +39,3 @@ add_subdirectory(third_party/spdlog)
|
||||
add_subdirectory(third_party/mempool)
|
||||
add_subdirectory(third_party/taskflow)
|
||||
add_subdirectory(third_party/glfw)
|
||||
|
||||
|
@ -1,6 +1,6 @@
|
||||
cmake_minimum_required(VERSION 3.5)
|
||||
project(core)
|
||||
set(CMAKE_CXX_STANDARD 23)
|
||||
set(CMAKE_CXX_STANDARD 26)
|
||||
|
||||
set(ALL_FILES "")
|
||||
retrieve_files(${CMAKE_CURRENT_SOURCE_DIR} ALL_FILES)
|
||||
@ -33,6 +33,13 @@ elseif(UNIX AND NOT APPLE)
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC PLATFORM_WINDOWS=0 PLATFORM_MACOS=0 PLATFORM_LINUX=1 GLFW_EXPOSE_NATIVE_X11)
|
||||
endif()
|
||||
|
||||
# cpu amd or arm
|
||||
if (CMAKE_SYSTEM_PROCESSOR MATCHES "amd64.*|x86_64.*|AMD64.*")
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC CPU_AMD64=1 CPU_ARM=0)
|
||||
elseif (CMAKE_SYSTEM_PROCESSOR MATCHES "arm.*|ARM.*")
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC CPU_AMD64=0 CPU_ARM=1)
|
||||
endif()
|
||||
|
||||
if (CMAKE_BUILD_TYPE MATCHES "Debug")
|
||||
target_compile_definitions(${PROJECT_NAME} PUBLIC BUILD_DEBUG=1)
|
||||
else()
|
||||
|
@ -1,57 +1,159 @@
|
||||
#include "audio_buffer.h"
|
||||
|
||||
#include <cstring>
|
||||
#include <experimental/simd>
|
||||
|
||||
void audio_buffer::resize(uint32_t channel_num, uint32_t block_size) {
|
||||
std::scoped_lock lock(lock_);
|
||||
buffer_.resize(channel_num);
|
||||
headers_.resize(channel_num);
|
||||
for (uint32_t i = 0; i < channel_num; i++) {
|
||||
buffer_[i].resize(block_size);
|
||||
headers_[i] = buffer_[i].data();
|
||||
#include "audio_buffer_pool.h"
|
||||
#include "misc/cpu_simd.h"
|
||||
#include "misc/likely.h"
|
||||
|
||||
void(*audio_buffer::add_func)(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent);
|
||||
void(*audio_buffer::multiple_func)(audio_buffer& in_buffer, float percent);
|
||||
|
||||
template<int simd_size>
|
||||
void add_simd(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent) {
|
||||
using namespace std::experimental;
|
||||
using simd_type = simd_abi::fixed_size<simd_size>;
|
||||
|
||||
simd<sample_t, simd_type> percent_simd(percent);
|
||||
for (uint32_t channel_index = 0; channel_index < in_buffer.get_num_channels(); channel_index++) {
|
||||
sample_t* channel = in_buffer.get_headers()[channel_index];
|
||||
sample_t* in_channel = from_buffer.get_headers()[channel_index];
|
||||
int i = 0;
|
||||
for (; i < in_buffer.get_num_samples(); i += simd_size) {
|
||||
simd<sample_t, simd_type> a(channel, element_aligned);
|
||||
simd<sample_t, simd_type> b(in_channel, element_aligned);
|
||||
a += b * percent_simd;
|
||||
a.copy_to(channel, element_aligned);
|
||||
|
||||
channel += simd_size;
|
||||
in_channel += simd_size;
|
||||
}
|
||||
// if the number of samples is not a multiple of simd_size
|
||||
for (; i < in_buffer.get_num_samples(); ++i) {
|
||||
channel[i] += in_channel[i] * percent;
|
||||
}
|
||||
}
|
||||
for (auto& channel : buffer_) {
|
||||
std::memset(channel.data(), 0, channel.size() * sizeof(sample_t));
|
||||
}
|
||||
}
|
||||
|
||||
void add_no_simd(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent) {
|
||||
for (uint32_t channel_index = 0; channel_index < in_buffer.get_num_channels(); channel_index++) {
|
||||
sample_t* channel = in_buffer.get_headers()[channel_index];
|
||||
sample_t* in_channel = from_buffer.get_headers()[channel_index];
|
||||
for (int i = 0; i < in_buffer.get_num_samples(); ++i) {
|
||||
channel[i] += in_channel[i] * percent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
template<int simd_size>
|
||||
void multiple_simd(audio_buffer& in_buffer, float percent) {
|
||||
using namespace std::experimental;
|
||||
using simd_type = simd_abi::fixed_size<simd_size>;
|
||||
|
||||
simd<sample_t, simd_type> percent_simd(percent);
|
||||
for (auto channel : in_buffer.get_headers_vector()) {
|
||||
int i = 0;
|
||||
for (; i < in_buffer.get_num_samples(); i += simd_size) {
|
||||
simd<sample_t, simd_type> a(channel, element_aligned);
|
||||
a *= percent_simd;
|
||||
a.copy_to(channel, element_aligned);
|
||||
channel += simd_size;
|
||||
}
|
||||
// if the number of samples is not a multiple of simd_size
|
||||
for (; i < in_buffer.get_num_samples(); ++i) {
|
||||
channel[i] *= percent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void multiple_no_simd(audio_buffer& in_buffer, float percent) {
|
||||
for (auto channel : in_buffer.get_headers_vector()) {
|
||||
for (int i = 0; i < in_buffer.get_num_samples(); ++i) {
|
||||
channel[i] *= percent;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
audio_buffer::audio_buffer() {
|
||||
using namespace std::experimental;
|
||||
static bool func_initialized = false;
|
||||
if (UNLIKELY(!func_initialized)) {
|
||||
cpuid cpu;
|
||||
#define DEFINE_SIMD_FUNC(simd_max) \
|
||||
constexpr size_t simd_size = simd_max / sizeof(sample_t) / 8; \
|
||||
add_func = &add_simd<simd_size>; \
|
||||
multiple_func = &multiple_simd<simd_size>;
|
||||
#if CPU_AMD64
|
||||
if (cpu.support_avx512()) {
|
||||
DEFINE_SIMD_FUNC(512)
|
||||
} else if (cpu.support_avx() || cpu.support_avx2()) {
|
||||
DEFINE_SIMD_FUNC(256)
|
||||
} else if (cpu.support_sse()) {
|
||||
DEFINE_SIMD_FUNC(128)
|
||||
}
|
||||
#endif
|
||||
#if CPU_ARM
|
||||
if (cpu.support_neon128()) {
|
||||
DEFINE_SIMD_FUNC(128)
|
||||
} else if (cpu.support_neon64()) {
|
||||
DEFINE_SIMD_FUNC(64)
|
||||
}
|
||||
#endif
|
||||
if (!add_func) {
|
||||
add_func = &add_no_simd;
|
||||
multiple_func = &multiple_no_simd;
|
||||
}
|
||||
func_initialized = true;
|
||||
}
|
||||
#undef DEFINE_SIMD_FUNC
|
||||
}
|
||||
|
||||
audio_buffer::~audio_buffer() {
|
||||
free();
|
||||
}
|
||||
|
||||
void audio_buffer::resize(uint32_t channel_num, uint32_t frame_size) {
|
||||
frame_size_ = frame_size;
|
||||
free();
|
||||
audio_buffer_pool* pool = get_audio_buffer_pool();
|
||||
for (int i = 0; i < channel_num; ++i) {
|
||||
sample_t* block = pool->alloc(frame_size);
|
||||
headers_.push_back(block);
|
||||
}
|
||||
clear();
|
||||
}
|
||||
|
||||
void audio_buffer::clear() {
|
||||
std::scoped_lock lock(lock_);
|
||||
for (auto& channel : buffer_) {
|
||||
std::memset(channel.data(), 0, channel.size() * sizeof(sample_t));
|
||||
for (sample_t* channel : headers_) {
|
||||
std::memset(channel, 0, frame_size_ * sizeof(sample_t));
|
||||
}
|
||||
}
|
||||
|
||||
void audio_buffer::mix(audio_buffer& from_buffer, float percent) {
|
||||
|
||||
void audio_buffer::add(audio_buffer& from_buffer, float percent) {
|
||||
std::scoped_lock lock(lock_);
|
||||
// will be optimized by compiler
|
||||
for (uint32_t channel_index = 0; channel_index < buffer_.size(); channel_index++) {
|
||||
auto& channel = buffer_[channel_index];
|
||||
auto& in_channel = from_buffer.buffer_[channel_index];
|
||||
for (uint32_t sample_index = 0; sample_index < channel.size(); sample_index++) {
|
||||
channel[sample_index] += in_channel[sample_index] * percent;
|
||||
}
|
||||
}
|
||||
add_func(*this, from_buffer, percent);
|
||||
}
|
||||
|
||||
void audio_buffer::multiple(float percent) {
|
||||
std::scoped_lock lock(lock_);
|
||||
// will be optimized by compiler
|
||||
for (auto& channel : buffer_) {
|
||||
for (auto& sample : channel) {
|
||||
sample *= percent;
|
||||
}
|
||||
}
|
||||
multiple_func(*this, percent);
|
||||
}
|
||||
|
||||
std::vector<sample_t> audio_buffer::get_interleaved_buffer() {
|
||||
std::scoped_lock lock(lock_);
|
||||
std::vector<sample_t> audio_buffer::get_interleaved_buffer() const {
|
||||
std::vector<sample_t> result;
|
||||
result.reserve(buffer_[0].size() * buffer_.size());
|
||||
for (uint32_t sample_index = 0; sample_index < buffer_[0].size(); sample_index++) {
|
||||
for (uint32_t channel_index = 0; channel_index < buffer_.size(); channel_index++) {
|
||||
result.push_back(buffer_[channel_index][sample_index]);
|
||||
result.reserve(headers_.size() * frame_size_);
|
||||
for (int i = 0; i < frame_size_; ++i) {
|
||||
for (const sample_t* channel : headers_) {
|
||||
result.push_back(channel[i]);
|
||||
}
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
void audio_buffer::free() {
|
||||
for (sample_t* header : headers_)
|
||||
get_audio_buffer_pool()->free(header);
|
||||
headers_.clear();
|
||||
}
|
||||
|
@ -3,24 +3,32 @@
|
||||
#include <mutex>
|
||||
#include <vector>
|
||||
#include "extern.h"
|
||||
#include <functional>
|
||||
|
||||
class CORE_API audio_buffer {
|
||||
public:
|
||||
static void(*add_func)(audio_buffer& in_buffer, audio_buffer& from_buffer, float percent);
|
||||
static void(*multiple_func)(audio_buffer& in_buffer, float percent);
|
||||
|
||||
audio_buffer();
|
||||
~audio_buffer();
|
||||
sample_t** get_headers() { return headers_.data(); }
|
||||
const std::vector<sample_t*>& get_headers_vector() { return headers_; }
|
||||
|
||||
[[nodiscard]] uint32_t get_num_channels() const { return buffer_.size(); }
|
||||
[[nodiscard]] uint32_t get_num_samples() const { return buffer_[0].size(); }
|
||||
[[nodiscard]] uint32_t get_num_channels() const { return headers_.size(); }
|
||||
[[nodiscard]] uint32_t get_num_samples() const { return frame_size_; }
|
||||
|
||||
void resize(uint32_t channel_num, uint32_t block_size);
|
||||
void resize(uint32_t channel_num, uint32_t frame_size);
|
||||
|
||||
void clear();
|
||||
void mix(audio_buffer& from_buffer, float percent = 1.f);
|
||||
void add(audio_buffer& from_buffer, float percent = 1.f);
|
||||
void multiple(float percent);
|
||||
|
||||
[[nodiscard]] std::vector<sample_t> get_interleaved_buffer();
|
||||
[[nodiscard]] std::vector<sample_t> get_interleaved_buffer() const;
|
||||
private:
|
||||
void free();
|
||||
private:
|
||||
std::vector<std::vector<sample_t>> buffer_;
|
||||
std::vector<sample_t*> headers_{};
|
||||
std::mutex lock_{};
|
||||
uint32_t frame_size_ = 0;
|
||||
};
|
||||
|
15
core/audio/misc/audio_buffer_pool.cpp
Normal file
15
core/audio/misc/audio_buffer_pool.cpp
Normal file
@ -0,0 +1,15 @@
|
||||
#include "audio_buffer_pool.h"
|
||||
|
||||
IMPL_SINGLETON_INSTANCE(audio_buffer_pool)
|
||||
|
||||
void audio_buffer_pool::init(singleton_initliazer& initliazer) {
|
||||
}
|
||||
|
||||
sample_t * audio_buffer_pool::alloc(uint32_t block_size) {
|
||||
auto* alloc_block = static_cast<sample_t*>(pool_.alloc(block_size * sizeof(sample_t)));
|
||||
return alloc_block;
|
||||
}
|
||||
|
||||
void audio_buffer_pool::free(sample_t* block) {
|
||||
pool_.free(block);
|
||||
}
|
18
core/audio/misc/audio_buffer_pool.h
Normal file
18
core/audio/misc/audio_buffer_pool.h
Normal file
@ -0,0 +1,18 @@
|
||||
#pragma once
|
||||
#include "mempool.h"
|
||||
#include "misc/singleton/singleton.h"
|
||||
|
||||
class CORE_API audio_buffer_pool : public singleton_t<audio_buffer_pool> {
|
||||
public:
|
||||
void init(singleton_initliazer& initliazer) override;
|
||||
const char* get_name() override { return "audio_buffer_pool"; }
|
||||
|
||||
sample_t* alloc(uint32_t block_size);
|
||||
void free(sample_t* block);
|
||||
protected:
|
||||
[[nodiscard]] mempool<>& get_pool() { return pool_; }
|
||||
private:
|
||||
mempool<> pool_ = mempool(1024000 * 4); // 一个4MB的内存池, 如果一个缓冲区大小为1024个样本, 那么这个内存池可以分配1024 * 4个缓冲区
|
||||
};
|
||||
|
||||
DEFINE_SINGLETON_INSTANCE(audio_buffer_pool)
|
121
core/misc/cpu_simd.h
Normal file
121
core/misc/cpu_simd.h
Normal file
@ -0,0 +1,121 @@
|
||||
#pragma once
|
||||
|
||||
#include <algorithm>
|
||||
#include <cpuid.h>
|
||||
#include <vector>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#include <intrin.h>
|
||||
#else
|
||||
#include <cpuid.h>
|
||||
#endif
|
||||
|
||||
inline void get_cpuid(int info[4], int infoType) {
|
||||
#ifdef _MSC_VER
|
||||
__cpuidex(info, infoType, 0);
|
||||
#else
|
||||
__cpuid_count(infoType, 0, info[0], info[1], info[2], info[3]);
|
||||
#endif
|
||||
}
|
||||
|
||||
enum class simd_type {
|
||||
sse2,
|
||||
sse3,
|
||||
ssse3,
|
||||
sse41,
|
||||
sse42,
|
||||
avx,
|
||||
avx2,
|
||||
avx512,
|
||||
neon64,
|
||||
neon128,
|
||||
};
|
||||
|
||||
inline std::vector<simd_type> get_simd_support_type() {
|
||||
unsigned int eax, ebx, ecx, edx;
|
||||
__get_cpuid(1, &eax, &ebx, &ecx, &edx);
|
||||
std::vector<simd_type> simd_types;
|
||||
#if CPU_AMD64
|
||||
if (ecx & bit_SSE2) {
|
||||
simd_types.push_back(simd_type::sse2);
|
||||
}
|
||||
if (ecx & bit_SSE3) {
|
||||
simd_types.push_back(simd_type::sse3);
|
||||
}
|
||||
if (ecx & bit_SSSE3) {
|
||||
simd_types.push_back(simd_type::ssse3);
|
||||
}
|
||||
if (ecx & bit_SSE4_1) {
|
||||
simd_types.push_back(simd_type::sse41);
|
||||
}
|
||||
if (ecx & bit_SSE4_2) {
|
||||
simd_types.push_back(simd_type::sse42);
|
||||
}
|
||||
if (ecx & bit_AVX) {
|
||||
simd_types.push_back(simd_type::avx);
|
||||
}
|
||||
if (ebx & bit_AVX2) {
|
||||
simd_types.push_back(simd_type::avx2);
|
||||
}
|
||||
if (ebx & bit_AVX512F) {
|
||||
simd_types.push_back(simd_type::avx512);
|
||||
}
|
||||
#endif
|
||||
#if CPU_ARM
|
||||
uint64_t id_aa64isar0_ = 0;
|
||||
uint64_t id_aa64pfr0_ = 0;
|
||||
|
||||
// Reading the ID_AA64ISAR0_EL1 register
|
||||
asm volatile("mrs %0, ID_AA64ISAR0_EL1" : "=r" (id_aa64isar0_));
|
||||
// Reading the ID_AA64PFR0_EL1 register
|
||||
asm volatile("mrs %0, ID_AA64PFR0_EL1" : "=r" (id_aa64pfr0_));
|
||||
if ((id_aa64isar0_ >> 24) & 0xf) {
|
||||
simd_types.push_back(simd_type::neon64);
|
||||
}
|
||||
if ((id_aa64isar0_ >> 28) & 0xf) {
|
||||
simd_types.push_back(simd_type::neon128);
|
||||
}
|
||||
#endif
|
||||
|
||||
return simd_types;
|
||||
}
|
||||
|
||||
class CORE_API cpuid {
|
||||
public:
|
||||
cpuid() {
|
||||
simd_types = get_simd_support_type();
|
||||
}
|
||||
[[nodiscard]] bool support_simd(simd_type simd) const {
|
||||
return std::ranges::find(simd_types, simd) != simd_types.end();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_sse() const {
|
||||
return support_simd(simd_type::sse42) || support_simd(simd_type::sse41) || support_simd(simd_type::ssse3) || support_simd(simd_type::sse3) || support_simd(simd_type::sse2);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_avx() const {
|
||||
return support_simd(simd_type::avx);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_avx2() const {
|
||||
return support_simd(simd_type::avx2);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_avx512() const {
|
||||
return support_simd(simd_type::avx512);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_neon() const {
|
||||
return support_neon64() || support_neon128();
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_neon64() const {
|
||||
return support_simd(simd_type::neon64);
|
||||
}
|
||||
|
||||
[[nodiscard]] bool support_neon128() const {
|
||||
return support_simd(simd_type::neon128);
|
||||
}
|
||||
private:
|
||||
std::vector<simd_type> simd_types;
|
||||
};
|
@ -21,11 +21,11 @@ public:
|
||||
messages_.push(message);
|
||||
}
|
||||
void push_message(const std::function<void()>& func) {
|
||||
lamba_thread_message* message = mem_pool_.alloc<lamba_thread_message>(func);
|
||||
auto* message = mem_pool_.alloc<lamba_thread_message>(func);
|
||||
push_message(message);
|
||||
}
|
||||
void process_messages();
|
||||
private:
|
||||
std::queue<thread_message*> messages_;
|
||||
mempool mem_pool_ = mempool(1024000);
|
||||
mempool<> mem_pool_ = mempool(1024000);
|
||||
};
|
||||
|
2
third_party/mempool/CMakeLists.txt
vendored
2
third_party/mempool/CMakeLists.txt
vendored
@ -2,7 +2,7 @@ cmake_minimum_required(VERSION 3.5)
|
||||
project(mempool)
|
||||
|
||||
set(CMAKE_C_STANDARD 99)
|
||||
set(CMAKE_CPP_STANDARD 23)
|
||||
set(CMAKE_CPP_STANDARD 17)
|
||||
|
||||
add_library(${PROJECT_NAME} SHARED
|
||||
mempool/ncx_core.h
|
||||
|
15
third_party/mempool/mempool.h
vendored
15
third_party/mempool/mempool.h
vendored
@ -1,19 +1,24 @@
|
||||
#pragma once
|
||||
#include "ncx_slab.h"
|
||||
#include <cstdlib>
|
||||
|
||||
template<int32_t alignment = 64>
|
||||
class mempool {
|
||||
public:
|
||||
mempool(size_t pool_size = 1024000) {
|
||||
const auto space = (u_char*)malloc(pool_size);
|
||||
mempool(size_t pool_size = 1024000) { // 1024KB
|
||||
// 分配一个缓存对齐的内存
|
||||
// const auto space = static_cast<u_char*>(operator new(pool_size, std::align_val_t(alignment)));
|
||||
const auto space = static_cast<u_char*>(malloc(pool_size));
|
||||
|
||||
mem_pool_ = (ncx_slab_pool_t*)space;
|
||||
mem_pool_->addr = space;
|
||||
mem_pool_ = reinterpret_cast<ncx_slab_pool_t*>(space);
|
||||
mem_pool_->addr = space;
|
||||
mem_pool_->min_shift = 3;
|
||||
mem_pool_->end = space + pool_size;
|
||||
mem_pool_->end = space + pool_size;
|
||||
|
||||
ncx_slab_init(mem_pool_);
|
||||
}
|
||||
~mempool() {
|
||||
// operator delete(mem_pool_->addr, std::align_val_t(alignment));
|
||||
free(mem_pool_->addr);
|
||||
}
|
||||
[[nodiscard]] void* alloc(size_t size) const {
|
||||
|
Loading…
x
Reference in New Issue
Block a user