index f2eb785811367a4d093676f8bec0d99980b17812..1a9c9100ed9eece66534d9245ccc3096e09f4992 100755 (executable)
// (b) NR (for triangular operations such as trmm and trsm).
//
-/*
-#define BLIS_DEFAULT_MC_S 336
-#define BLIS_DEFAULT_KC_S 528
-#define BLIS_DEFAULT_NC_S 4096
-
-#define BLIS_DEFAULT_MC_D 64
-#define BLIS_DEFAULT_KC_D 128
-#define BLIS_DEFAULT_NC_D 4096
-
-#define BLIS_DEFAULT_MC_C 64
-#define BLIS_DEFAULT_KC_C 128
-#define BLIS_DEFAULT_NC_C 4096
-
-#define BLIS_DEFAULT_MC_Z 64
-#define BLIS_DEFAULT_KC_Z 128
-#define BLIS_DEFAULT_NC_Z 4096
-*/
-
-
-// Old values with only 2 buffers of KN in L2
-//#define BLIS_DEFAULT_MC_S 96
-//#define BLIS_DEFAULT_KC_S 428
-//#define BLIS_DEFAULT_NC_S 944
-//
-//#define BLIS_DEFAULT_MC_D 104
-//#define BLIS_DEFAULT_KC_D 220
-//#define BLIS_DEFAULT_NC_D 820
-//
-//#define BLIS_DEFAULT_MC_C 96
-//#define BLIS_DEFAULT_KC_C 260
-//#define BLIS_DEFAULT_NC_C 820
-//
-//#define BLIS_DEFAULT_MC_Z 90
-//#define BLIS_DEFAULT_KC_Z 178
-//#define BLIS_DEFAULT_NC_Z 584
-
-//Values for 2 buffers of KN in L3
+#if defined(MEM_MODEL_LARGE)
#define BLIS_DEFAULT_MC_S 144
#define BLIS_DEFAULT_KC_S 428
#define BLIS_DEFAULT_NC_S 944
-// MR = 4, NR = 4
-//#define BLIS_DEFAULT_MC_S 40
-//#define BLIS_DEFAULT_KC_S 580
-//#define BLIS_DEFAULT_NC_S 436
-
#define BLIS_DEFAULT_MC_D 132
#define BLIS_DEFAULT_KC_D 220
#define BLIS_DEFAULT_NC_D 864
#define BLIS_DEFAULT_3M_KC_Z 100
#define BLIS_DEFAULT_3M_NC_Z 100
+#elif defined (MEM_MODEL_MEDIUM)
+#define BLIS_DEFAULT_MC_S 128
+#define BLIS_DEFAULT_KC_S 240
+#define BLIS_DEFAULT_NC_S 1288
-//#define BLIS_DEFAULT_MC_S 104
-////#define BLIS_DEFAULT_KC_S 440 //So that 2 MRxKC buffers can fit in L1 = 448, reduced to 447 to accommodate for bank conflict, reduced to 440 because KC must be divisible by MR & NR
-//#define BLIS_DEFAULT_KC_S 260
-//#define BLIS_DEFAULT_NC_S 1184 // Increased to fill up 4MB of L3 also NC should be divisible by NR
-//
-//#define BLIS_DEFAULT_MC_D 100
-//#define BLIS_DEFAULT_KC_D 110
-//#define BLIS_DEFAULT_NC_D 884
-//
-//#define BLIS_DEFAULT_MC_C 64
-//#define BLIS_DEFAULT_KC_C 110
-//#define BLIS_DEFAULT_NC_C 512
-//
-//#define BLIS_DEFAULT_MC_Z 32
-//#define BLIS_DEFAULT_KC_Z 50
-//#define BLIS_DEFAULT_NC_Z 256
+#define BLIS_DEFAULT_MC_D 68
+#define BLIS_DEFAULT_KC_D 240
+#define BLIS_DEFAULT_NC_D 844
+#define BLIS_DEFAULT_MC_C 68
+#define BLIS_DEFAULT_KC_C 240
+#define BLIS_DEFAULT_NC_C 844
-// -- Register blocksizes --
+#define BLIS_DEFAULT_MC_Z 60
+#define BLIS_DEFAULT_KC_Z 136
+#define BLIS_DEFAULT_NC_Z 631
+
+#define BLIS_DEFAULT_4M_MC_C 68
+#define BLIS_DEFAULT_4M_KC_C 240
+#define BLIS_DEFAULT_4M_NC_C 844
+
+#define BLIS_DEFAULT_4M_MC_Z 60
+#define BLIS_DEFAULT_4M_KC_Z 136
+#define BLIS_DEFAULT_4M_NC_Z 628
+
+#define BLIS_DEFAULT_3M_MC_C 68
+#define BLIS_DEFAULT_3M_KC_C 160
+#define BLIS_DEFAULT_3M_NC_C 720
+
+#define BLIS_DEFAULT_3M_MC_Z 52
+#define BLIS_DEFAULT_3M_KC_Z 100
+#define BLIS_DEFAULT_3M_NC_Z 524
+
+
+#elif defined(MEM_MODEL_SMALL)
+// use this when EDMA is disabled for A and B
+#define BLIS_DEFAULT_MC_S 112
+#define BLIS_DEFAULT_KC_S 428
+#define BLIS_DEFAULT_NC_S 1224
+
+#define BLIS_DEFAULT_MC_D 96
+#define BLIS_DEFAULT_KC_D 220
+#define BLIS_DEFAULT_NC_D 1184
+
+#define BLIS_DEFAULT_MC_C 88
+#define BLIS_DEFAULT_KC_C 260
+#define BLIS_DEFAULT_NC_C 1008
+
+#define BLIS_DEFAULT_MC_Z 64
+#define BLIS_DEFAULT_KC_Z 178
+#define BLIS_DEFAULT_NC_Z 736
+
+#define BLIS_DEFAULT_4M_MC_C 108
+#define BLIS_DEFAULT_4M_KC_C 220
+#define BLIS_DEFAULT_4M_NC_C 1184
+#define BLIS_DEFAULT_4M_MC_Z 64
+#define BLIS_DEFAULT_4M_KC_Z 178
+#define BLIS_DEFAULT_4M_NC_Z 736
+
+#define BLIS_DEFAULT_3M_MC_C 64
+#define BLIS_DEFAULT_3M_KC_C 220
+#define BLIS_DEFAULT_3M_NC_C 792
+
+#define BLIS_DEFAULT_3M_MC_Z 48
+#define BLIS_DEFAULT_3M_KC_Z 178
+#define BLIS_DEFAULT_3M_NC_Z 488
+
+/*
+#define BLIS_DEFAULT_MC_S 144
+#define BLIS_DEFAULT_KC_S 428
+#define BLIS_DEFAULT_NC_S 1224
+
+#define BLIS_DEFAULT_MC_D 140
+#define BLIS_DEFAULT_KC_D 220
+#define BLIS_DEFAULT_NC_D 1184
+
+#define BLIS_DEFAULT_MC_C 116
+#define BLIS_DEFAULT_KC_C 260
+#define BLIS_DEFAULT_NC_C 1008
+
+#define BLIS_DEFAULT_MC_Z 86
+#define BLIS_DEFAULT_KC_Z 178
+#define BLIS_DEFAULT_NC_Z 736
+
+#define BLIS_DEFAULT_4M_MC_C 140
+#define BLIS_DEFAULT_4M_KC_C 220
+#define BLIS_DEFAULT_4M_NC_C 1184
+
+#define BLIS_DEFAULT_4M_MC_Z 86
+#define BLIS_DEFAULT_4M_KC_Z 178
+#define BLIS_DEFAULT_4M_NC_Z 736
+
+#define BLIS_DEFAULT_3M_MC_C 88
+#define BLIS_DEFAULT_3M_KC_C 220
+#define BLIS_DEFAULT_3M_NC_C 792
+
+#define BLIS_DEFAULT_3M_MC_Z 56
+#define BLIS_DEFAULT_3M_KC_Z 178
+#define BLIS_DEFAULT_3M_NC_Z 488
+*/
+// use this when EDMA is enabled
+/*
+#define BLIS_DEFAULT_MC_S 104
+#define BLIS_DEFAULT_KC_S 196
+#define BLIS_DEFAULT_NC_S 824
+
+#define BLIS_DEFAULT_MC_D 64
+#define BLIS_DEFAULT_KC_D 180
+#define BLIS_DEFAULT_NC_D 540
+
+#define BLIS_DEFAULT_MC_C 64
+#define BLIS_DEFAULT_KC_C 180
+#define BLIS_DEFAULT_NC_C 540
+
+#define BLIS_DEFAULT_MC_Z 32
+#define BLIS_DEFAULT_KC_Z 145
+#define BLIS_DEFAULT_NC_Z 306
+
+#define BLIS_DEFAULT_4M_MC_C 64
+#define BLIS_DEFAULT_4M_KC_C 180
+#define BLIS_DEFAULT_4M_NC_C 540
+
+#define BLIS_DEFAULT_4M_MC_Z 32
+#define BLIS_DEFAULT_4M_KC_Z 145
+#define BLIS_DEFAULT_4M_NC_Z 306
+
+#define BLIS_DEFAULT_3M_MC_C 64
+#define BLIS_DEFAULT_3M_KC_C 96
+#define BLIS_DEFAULT_3M_NC_C 488
+
+#define BLIS_DEFAULT_3M_MC_Z 36
+#define BLIS_DEFAULT_3M_KC_Z 108
+#define BLIS_DEFAULT_3M_NC_Z 196
+*/
+#endif
+
+// -- Register blocksizes --
+// same for different memory models (C66x architecture), need to redefine for C7x
#define BLIS_DEFAULT_MR_S 4
#define BLIS_DEFAULT_NR_S 8 //4 //