@@ -494,6 +494,160 @@ pub(crate) fn codegen_x86_llvm_intrinsic_call<'tcx>(
494494 }
495495 }
496496
497+ "llvm.x86.avx2.packuswb" => {
498+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packus_epi16&ig_expand=4906
499+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
500+
501+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
502+ let layout = a. layout ( ) ;
503+
504+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
505+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
506+ assert_eq ! ( lane_ty, fx. tcx. types. i16 ) ;
507+ assert_eq ! ( ret_lane_ty, fx. tcx. types. u8 ) ;
508+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
509+
510+ let zero = fx. bcx . ins ( ) . iconst ( types:: I16 , 0 ) ;
511+ let max_u8 = fx. bcx . ins ( ) . iconst ( types:: I16 , 255 ) ;
512+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . u8 ) ;
513+
514+ for idx in 0 ..lane_count / 2 {
515+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
516+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
517+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
518+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
519+
520+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
521+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
522+ }
523+
524+ for idx in 0 ..lane_count / 2 {
525+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
526+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
527+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
528+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
529+
530+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
531+ ret. place_lane ( fx, lane_count / 2 + idx) . write_cvalue ( fx, res_lane) ;
532+ }
533+
534+ for idx in 0 ..lane_count / 2 {
535+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
536+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
537+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
538+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
539+
540+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
541+ ret. place_lane ( fx, lane_count / 2 * 2 + idx) . write_cvalue ( fx, res_lane) ;
542+ }
543+
544+ for idx in 0 ..lane_count / 2 {
545+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
546+ let sat = fx. bcx . ins ( ) . smax ( lane, zero) ;
547+ let sat = fx. bcx . ins ( ) . umin ( sat, max_u8) ;
548+ let res = fx. bcx . ins ( ) . ireduce ( types:: I8 , sat) ;
549+
550+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
551+ ret. place_lane ( fx, lane_count / 2 * 3 + idx) . write_cvalue ( fx, res_lane) ;
552+ }
553+ }
554+
555+ "llvm.x86.sse2.packssdw.128" => {
556+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm_packs_epi32&ig_expand=4889
557+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
558+
559+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
560+ let layout = a. layout ( ) ;
561+
562+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
563+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
564+ assert_eq ! ( lane_ty, fx. tcx. types. i32 ) ;
565+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
566+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
567+
568+ let min_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MIN as u16 ) ) ;
569+ let max_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MAX as u16 ) ) ;
570+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
571+
572+ for idx in 0 ..lane_count {
573+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
574+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
575+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
576+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
577+
578+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
579+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
580+ }
581+
582+ for idx in 0 ..lane_count {
583+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
584+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
585+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
586+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
587+
588+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
589+ ret. place_lane ( fx, lane_count + idx) . write_cvalue ( fx, res_lane) ;
590+ }
591+ }
592+
593+ "llvm.x86.avx2.packssdw" => {
594+ // https://www.intel.com/content/www/us/en/docs/intrinsics-guide/index.html#text=_mm256_packs_epi32&ig_expand=4892
595+ intrinsic_args ! ( fx, args => ( a, b) ; intrinsic) ;
596+
597+ assert_eq ! ( a. layout( ) , b. layout( ) ) ;
598+ let layout = a. layout ( ) ;
599+
600+ let ( lane_count, lane_ty) = layout. ty . simd_size_and_type ( fx. tcx ) ;
601+ let ( ret_lane_count, ret_lane_ty) = ret. layout ( ) . ty . simd_size_and_type ( fx. tcx ) ;
602+ assert_eq ! ( lane_ty, fx. tcx. types. i32 ) ;
603+ assert_eq ! ( ret_lane_ty, fx. tcx. types. i16 ) ;
604+ assert_eq ! ( lane_count * 2 , ret_lane_count) ;
605+
606+ let min_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MIN as u16 ) ) ;
607+ let max_i16 = fx. bcx . ins ( ) . iconst ( types:: I32 , i64:: from ( i16:: MAX as u16 ) ) ;
608+ let ret_lane_layout = fx. layout_of ( fx. tcx . types . i16 ) ;
609+
610+ for idx in 0 ..lane_count / 2 {
611+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
612+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
613+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
614+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
615+
616+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
617+ ret. place_lane ( fx, idx) . write_cvalue ( fx, res_lane) ;
618+ }
619+
620+ for idx in 0 ..lane_count / 2 {
621+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
622+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
623+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
624+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
625+
626+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
627+ ret. place_lane ( fx, lane_count / 2 + idx) . write_cvalue ( fx, res_lane) ;
628+ }
629+
630+ for idx in 0 ..lane_count / 2 {
631+ let lane = a. value_lane ( fx, idx) . load_scalar ( fx) ;
632+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
633+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
634+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
635+
636+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
637+ ret. place_lane ( fx, lane_count / 2 * 2 + idx) . write_cvalue ( fx, res_lane) ;
638+ }
639+
640+ for idx in 0 ..lane_count / 2 {
641+ let lane = b. value_lane ( fx, idx) . load_scalar ( fx) ;
642+ let sat = fx. bcx . ins ( ) . smax ( lane, min_i16) ;
643+ let sat = fx. bcx . ins ( ) . umin ( sat, max_i16) ;
644+ let res = fx. bcx . ins ( ) . ireduce ( types:: I16 , sat) ;
645+
646+ let res_lane = CValue :: by_val ( res, ret_lane_layout) ;
647+ ret. place_lane ( fx, lane_count / 2 * 3 + idx) . write_cvalue ( fx, res_lane) ;
648+ }
649+ }
650+
497651 _ => {
498652 fx. tcx
499653 . sess
0 commit comments