diff --git a/.gitignore b/.gitignore old mode 100644 new mode 100755 index 103174b..824e8ef --- a/.gitignore +++ b/.gitignore @@ -3,3 +3,5 @@ /.idea/ /SchemSearch.class /SchemSearch.h +/schemsearch-lib/src/.idea/ +/schemsearch-lib/src/cmake-build-debug/ diff --git a/schemsearch-lib/build.rs b/schemsearch-lib/build.rs old mode 100644 new mode 100755 index 13a5446..986f0e1 --- a/schemsearch-lib/build.rs +++ b/schemsearch-lib/build.rs @@ -3,5 +3,9 @@ use cc; fn main() { cc::Build::new() .file("src/compare.c") + .flag("-ftree-vectorize") + .flag("-march=native") + .flag("-mtune=native") + .flag("-ffast-math") .compile("compare"); } \ No newline at end of file diff --git a/schemsearch-lib/src/CMakeLists.txt b/schemsearch-lib/src/CMakeLists.txt new file mode 100755 index 0000000..05733f5 --- /dev/null +++ b/schemsearch-lib/src/CMakeLists.txt @@ -0,0 +1,7 @@ +cmake_minimum_required(VERSION 3.27) +project(src C) + +set(CMAKE_C_STANDARD 11) + +add_executable(src + compare.c) diff --git a/schemsearch-lib/src/compare.c b/schemsearch-lib/src/compare.c old mode 100644 new mode 100755 index 6231485..882623a --- a/schemsearch-lib/src/compare.c +++ b/schemsearch-lib/src/compare.c @@ -1,5 +1,6 @@ #include #include +#include int32_t isMatching( const int32_t *schem_data, @@ -36,35 +37,74 @@ int32_t isMatching( } void is_matching_all( - const int32_t *schem_data, - const int32_t *pattern_data, + const int32_t *__restrict__ schem_data, + const int32_t *__restrict__ pattern_data, int32_t schem_width, int32_t schem_height, int32_t schem_length, int32_t pattern_width, int32_t pattern_height, int32_t pattern_length, - int32_t *result + int32_t *__restrict__ result ) { - for (int32_t pz = 0; pz < pattern_length; ++pz) { - int32_t maxZ = schem_length - pattern_length + pz + 1; - for (int32_t py = 0; py < pattern_height; ++py) { - int32_t maxY = schem_height - pattern_height + py + 1; - for (int32_t px = 0; px < pattern_width; ++px) { - int32_t pv = pattern_data[px + py * pattern_width + pz * pattern_width * pattern_height]; - int32_t maxX = schem_width - pattern_width + px + 1; - for (int32_t z = pz; z < maxZ; ++z) { - int32_t sourceOffsetZ = z * schem_width * schem_height; - int32_t resultOffsetZ = (z - pz) * schem_width * schem_height - py * schem_width; - for (int32_t y = py; y < maxY; ++y) { - int32_t sourceOffsetY = sourceOffsetZ + y * schem_width; - int32_t resultOffsetY = resultOffsetZ + y * schem_width - px; - for (int32_t x = px; x < maxX; ++x) { - result[resultOffsetY + x] += schem_data[sourceOffsetY + x] == pv; + if(pattern_width*pattern_height*pattern_length >= 65536) { //TODO check for table size < 65536 + for (int32_t pz = 0; pz < pattern_length; ++pz) { + int32_t maxZ = schem_length - pattern_length + pz + 1; + for (int32_t py = 0; py < pattern_height; ++py) { + int32_t maxY = schem_height - pattern_height + py + 1; + for (int32_t px = 0; px < pattern_width; ++px) { + int32_t pv = pattern_data[px + py * pattern_width + pz * pattern_width * pattern_height]; + int32_t maxX = schem_width - pattern_width + px + 1; + for (int32_t z = pz; z < maxZ; ++z) { + int32_t sourceOffsetZ = z * schem_width * schem_height; + int32_t resultOffsetZ = (z - pz) * schem_width * schem_height - py * schem_width; + for (int32_t y = py; y < maxY; ++y) { + int32_t sourceOffsetY = sourceOffsetZ + y * schem_width; + int32_t resultOffsetY = resultOffsetZ + y * schem_width - px; + for (int32_t x = px; x < maxX; ++x) { + result[resultOffsetY + x] += schem_data[sourceOffsetY + x] == pv; + } } } } } } + } else { + size_t schem_size = schem_width*schem_height*schem_length; + uint16_t *__restrict__ sschem_data = (uint16_t*)malloc(schem_size*2); + uint16_t *__restrict__ sresult = (uint16_t*)malloc(schem_size*2); + for(size_t i = 0; i < schem_size; i++) { + sschem_data[i] = schem_data[i]; + sresult[i] = 0; + } + + for (int32_t pz = 0; pz < pattern_length; ++pz) { + int32_t maxZ = schem_length - pattern_length + pz + 1; + for (int32_t py = 0; py < pattern_height; ++py) { + int32_t maxY = schem_height - pattern_height + py + 1; + for (int32_t px = 0; px < pattern_width; ++px) { + uint16_t pv = (uint16_t)pattern_data[px + py * pattern_width + pz * pattern_width * pattern_height]; + int32_t maxX = schem_width - pattern_width + px + 1; + for (int32_t z = pz; z < maxZ; ++z) { + int32_t sourceOffsetZ = z * schem_width * schem_height; + int32_t resultOffsetZ = (z - pz) * schem_width * schem_height - py * schem_width; + for (int32_t y = py; y < maxY; ++y) { + int32_t sourceOffsetY = sourceOffsetZ + y * schem_width; + int32_t resultOffsetY = resultOffsetZ + y * schem_width - px; + for (int32_t x = px; x < maxX; ++x) { + sresult[resultOffsetY + x] += sschem_data[sourceOffsetY + x] == pv; + } + } + } + } + } + } + + for(size_t i = 0; i < schem_size; i++) { + result[i] = sresult[i]; + } + + free(sschem_data); + free(sresult); } -} +} \ No newline at end of file