@@ -413,6 +413,77 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
413413 ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
414414 }
415415 }
416+
417+ "llvm.x86.ssse3.pmul.hr.sw.128" => {
418+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_mulhrs_epi16&ig_expand=4782
419+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
420+
421+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
422+ let layout = a. layout ( ) ;
423+
424+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
425+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
426+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
427+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
428+ assert_eq ! ( lane_count, ret_lane_count) ;
429+
430+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
431+ for out_lane_idx in 0 ..lane_count {
432+ let a_lane = a. value_lane ( fx, out_lane_idx) . load_scalar ( fx) ;
433+ let a_lane = fx. bcx . ins ( ) . sextend ( types:: I32 , a_lane) ;
434+ let b_lane = b. value_lane ( fx, out_lane_idx) . load_scalar ( fx) ;
435+ let b_lane = fx. bcx . ins ( ) . sextend ( types:: I32 , b_lane) ;
436+
437+ let mul: Value = fx. bcx . ins ( ) . imul ( a_lane, b_lane) ;
438+ let shifted = fx. bcx . ins ( ) . ushr_imm ( mul, 14 ) ;
439+ let incremented = fx. bcx . ins ( ) . iadd_imm ( shifted, 1 ) ;
440+ let shifted_again = fx. bcx . ins ( ) . ushr_imm ( incremented, 1 ) ;
441+
442+ let res_lane = fx. bcx . ins ( ) . ireduce ( types:: I16 , shifted_again) ;
443+ let res_lane = CValue :: by_val ( res_lane, ret_lane_layout) ;
444+
445+ ret. place_lane ( fx, out_lane_idx) . write_cvalue ( fx, res_lane) ;
446+ }
447+ }
448+
449+ "llvm.x86.sse2.packuswb.128" => {
450+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packus_epi16&ig_expand=4903
451+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
452+
453+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
454+ let layout = a. layout ( ) ;
455+
456+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
457+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
458+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
459+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u8 ) ;
460+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
461+
462+ let zero = fx. bcx . ins ( ) . iconst ( types:: I16 , 0 ) ;
463+ let max_u8 = fx. bcx . ins ( ) . iconst ( types:: I16 , 255 ) ;
464+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u8 ) ;
465+
466+ for idx in 0 ..lane_count {
467+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
468+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
469+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
470+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
471+
472+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
473+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
474+ }
475+
476+ for idx in 0 ..lane_count {
477+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
478+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
479+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
480+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
481+
482+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
483+ ret. place_lane ( fx, lane_count + idx) . write_cvalue ( fx, res_lane) ;
484+ }
485+ }
486+
416487 _ => {
417488 fx. tcx
418489 . sess
0 commit comments