@@ -204,6 +204,8 @@ static ggml_cuda_device_info ggml_cuda_init() {
204204 GGML_LOG_INFO (" %s: GGML_CUDA_FORCE_CUBLAS: no\n " , __func__);
205205#endif // GGML_CUDA_FORCE_CUBLAS
206206 GGML_LOG_INFO (" %s: found %d " GGML_CUDA_NAME " devices:\n " , __func__, info.device_count );
207+
208+ std::vector<std::pair<int , std::string>> turing_devices_without_mma;
207209 for (int id = 0 ; id < info.device_count ; ++id) {
208210 int device_vmm = 0 ;
209211
@@ -261,7 +263,25 @@ static ggml_cuda_device_info ggml_cuda_init() {
261263 info.devices [id].cc = 100 *prop.major + 10 *prop.minor ;
262264 GGML_LOG_INFO (" Device %d: %s, compute capability %d.%d, VMM: %s\n " ,
263265 id, prop.name , prop.major , prop.minor , device_vmm ? " yes" : " no" );
264- #endif // defined(GGML_USE_HIP)
266+ std::string device_name (prop.name );
267+ if (device_name == " NVIDIA GeForce MX450" ) {
268+ turing_devices_without_mma.push_back ({ id, device_name });
269+ } else if (device_name == " NVIDIA GeForce MX550" ) {
270+ turing_devices_without_mma.push_back ({ id, device_name });
271+ } else if (device_name.substr (0 , 21 ) == " NVIDIA GeForce GTX 16" ) {
272+ turing_devices_without_mma.push_back ({ id, device_name });
273+ }
274+ #endif // defined(GGML_USE_HIP)
275+ }
276+
277+ if (ggml_cuda_highest_compiled_arch (GGML_CUDA_CC_TURING) >= GGML_CUDA_CC_TURING && !turing_devices_without_mma.empty ()) {
278+ GGML_LOG_INFO (" The following devices will have suboptimal performance due to a lack of tensor cores:\n " );
279+ for (size_t device_pos = 0 ; device_pos < turing_devices_without_mma.size (); device_pos++) {
280+ GGML_LOG_INFO (
281+ " Device %d: %s\n " , turing_devices_without_mma[device_pos].first , turing_devices_without_mma[device_pos].second .c_str ());
282+ }
283+ GGML_LOG_INFO (
284+ " Consider compiling with CMAKE_CUDA_ARCHITECTURES=61-virtual;80-virtual and DGGML_CUDA_FORCE_MMQ to force the use of the Pascal code for Turing.\n " );
265285 }
266286
267287 for (int id = 0 ; id < info.device_count ; ++id) {
0 commit comments