diff --git a/Makefile.system b/Makefile.system index e602eaf05c..5cfe71618c 100644 --- a/Makefile.system +++ b/Makefile.system @@ -679,7 +679,6 @@ DYNAMIC_CORE += CORTEXA53 DYNAMIC_CORE += CORTEXA57 DYNAMIC_CORE += NEOVERSEN1 ifneq ($(NO_SVE), 1) -DYNAMIC_CORE += NEOVERSEV1 DYNAMIC_CORE += NEOVERSEN2 DYNAMIC_CORE += ARMV8SVE endif diff --git a/driver/others/dynamic_arm64.c b/driver/others/dynamic_arm64.c index b5fb8161d3..3a6567e0a6 100644 --- a/driver/others/dynamic_arm64.c +++ b/driver/others/dynamic_arm64.c @@ -133,9 +133,9 @@ extern gotoblas_t gotoblas_TSV110; extern gotoblas_t gotoblas_EMAG8180; extern gotoblas_t gotoblas_NEOVERSEN1; #ifndef NO_SVE -extern gotoblas_t gotoblas_NEOVERSEV1; extern gotoblas_t gotoblas_NEOVERSEN2; extern gotoblas_t gotoblas_ARMV8SVE; +#define gotoblas_NEOVERSEV1 gotoblas_ARMV8SVE #else #define gotoblas_NEOVERSEV1 gotoblas_ARMV8 #define gotoblas_NEOVERSEN2 gotoblas_ARMV8 @@ -167,6 +167,7 @@ extern void openblas_warning(int verbose, const char * msg); static char *corename[] = { "armv8", + "armv8sve", "cortexa53", "cortexa57", "cortexa72", @@ -181,27 +182,26 @@ static char *corename[] = { "neoversen2", "thunderx3t110", "cortexa55", - "armv8sve", "unknown" }; char *gotoblas_corename(void) { if (gotoblas == &gotoblas_ARMV8) return corename[ 0]; - if (gotoblas == &gotoblas_CORTEXA53) return corename[ 1]; - if (gotoblas == &gotoblas_CORTEXA57) return corename[ 2]; - if (gotoblas == &gotoblas_CORTEXA72) return corename[ 3]; - if (gotoblas == &gotoblas_CORTEXA73) return corename[ 4]; - if (gotoblas == &gotoblas_FALKOR) return corename[ 5]; - if (gotoblas == &gotoblas_THUNDERX) return corename[ 6]; - if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 7]; - if (gotoblas == &gotoblas_TSV110) return corename[ 8]; - if (gotoblas == &gotoblas_EMAG8180) return corename[ 9]; - if (gotoblas == &gotoblas_NEOVERSEN1) return corename[10]; - if (gotoblas == &gotoblas_NEOVERSEV1) return corename[11]; - if (gotoblas == &gotoblas_NEOVERSEN2) return corename[12]; - if (gotoblas == &gotoblas_THUNDERX3T110) return corename[13]; - if (gotoblas == &gotoblas_CORTEXA55) return corename[14]; - if (gotoblas == &gotoblas_ARMV8SVE) return corename[15]; + if (gotoblas == &gotoblas_ARMV8SVE) return corename[ 1]; + if (gotoblas == &gotoblas_CORTEXA53) return corename[ 2]; + if (gotoblas == &gotoblas_CORTEXA57) return corename[ 3]; + if (gotoblas == &gotoblas_CORTEXA72) return corename[ 4]; + if (gotoblas == &gotoblas_CORTEXA73) return corename[ 5]; + if (gotoblas == &gotoblas_FALKOR) return corename[ 6]; + if (gotoblas == &gotoblas_THUNDERX) return corename[ 7]; + if (gotoblas == &gotoblas_THUNDERX2T99) return corename[ 8]; + if (gotoblas == &gotoblas_TSV110) return corename[ 9]; + if (gotoblas == &gotoblas_EMAG8180) return corename[10]; + if (gotoblas == &gotoblas_NEOVERSEN1) return corename[11]; + if (gotoblas == &gotoblas_NEOVERSEV1) return corename[12]; + if (gotoblas == &gotoblas_NEOVERSEN2) return corename[13]; + if (gotoblas == &gotoblas_THUNDERX3T110) return corename[14]; + if (gotoblas == &gotoblas_CORTEXA55) return corename[15]; return corename[NUM_CORETYPES]; } diff --git a/getarch.c b/getarch.c index 87384c0840..480bfc69a9 100644 --- a/getarch.c +++ b/getarch.c @@ -1245,7 +1245,7 @@ USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #define SUBDIRNAME "arm64" #define ARCHCONFIG "-DARMV8SVE " \ "-DL1_DATA_SIZE=32768 -DL1_DATA_LINESIZE=64 " \ - "-DL2_SIZE=262144 -DL2_LINESIZE=64 " \ + "-DL2_SIZE=262144 -DL2_LINESIZE=64 -DSCALE_L2 " \ "-DDTB_DEFAULT_ENTRIES=64 -DDTB_SIZE=4096 -DL2_ASSOCIATIVE=32 " \ "-DHAVE_VFPV4 -DHAVE_VFPV3 -DHAVE_VFP -DHAVE_NEON -DHAVE_SVE -DARMV8" #define LIBNAME "armv8sve" diff --git a/kernel/setparam-ref.c b/kernel/setparam-ref.c index 4c361f1559..b37999e457 100644 --- a/kernel/setparam-ref.c +++ b/kernel/setparam-ref.c @@ -890,15 +890,41 @@ gotoblas_t TABLE_NAME = { }; #if (ARCH_ARM64) + +#define L2_CACHE_FILE "/sys/devices/system/cpu/cpu0/cache/index2/size" +static inline uint64_t get_l2_multiplier() { +#if defined(__linux) && defined(SCALE_L2) + char buffer[100]; + FILE* sysfs_file = fopen(L2_CACHE_FILE, "r"); + uint64_t cache_size = 0; + char cache_unit = '\n'; + if (sysfs_file && fgets(buffer, sizeof(buffer), sysfs_file) != NULL) { + if (sscanf(buffer, "%ld%c", &cache_size, &cache_unit) > 1) { + switch (cache_unit) { + case 'K': + return MAX(cache_size >> 7, 1); + case '\n': + return MAX(cache_size >> 17, 1); + default: // unknown + return 1; + } + } + } +#endif + return 1; +} + static void init_parameter(void) { + const uint64_t l2_multiplier = get_l2_multiplier(); + #if (BUILD_BFLOAT16) TABLE_NAME.sbgemm_p = SBGEMM_DEFAULT_P; #endif #if (BUILD_SINGLE==1) || (BUILD_COMPLEX==1) - TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P; + TABLE_NAME.sgemm_p = SGEMM_DEFAULT_P * l2_multiplier; #endif #if BUILD_DOUBLE == 1 || (BUILD_COMPLEX16==1) - TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P; + TABLE_NAME.dgemm_p = DGEMM_DEFAULT_P * l2_multiplier; #endif #if BUILD_COMPLEX==1 TABLE_NAME.cgemm_p = CGEMM_DEFAULT_P; @@ -911,10 +937,10 @@ static void init_parameter(void) { TABLE_NAME.sbgemm_q = SBGEMM_DEFAULT_Q; #endif #if BUILD_SINGLE == 1 || (BUILD_COMPLEX==1) - TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q; + TABLE_NAME.sgemm_q = SGEMM_DEFAULT_Q * l2_multiplier; #endif #if BUILD_DOUBLE== 1 || (BUILD_COMPLEX16==1) - TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q; + TABLE_NAME.dgemm_q = DGEMM_DEFAULT_Q * l2_multiplier; #endif #if BUILD_COMPLEX== 1 TABLE_NAME.cgemm_q = CGEMM_DEFAULT_Q; diff --git a/param.h b/param.h index 1ec2d16dd5..50629e647a 100644 --- a/param.h +++ b/param.h @@ -3517,13 +3517,13 @@ Until then, just keep it different than DGEMM_DEFAULT_UNROLL_N to keep copy rout #define ZGEMM_DEFAULT_UNROLL_N 4 #define ZGEMM_DEFAULT_UNROLL_MN 16 -#define SGEMM_DEFAULT_P 128 -#define DGEMM_DEFAULT_P 160 +#define SGEMM_DEFAULT_P 30 +#define DGEMM_DEFAULT_P 30 #define CGEMM_DEFAULT_P 128 #define ZGEMM_DEFAULT_P 128 -#define SGEMM_DEFAULT_Q 352 -#define DGEMM_DEFAULT_Q 128 +#define SGEMM_DEFAULT_Q 80 +#define DGEMM_DEFAULT_Q 40 #define CGEMM_DEFAULT_Q 224 #define ZGEMM_DEFAULT_Q 112