@@ -31,6 +31,12 @@ static cpumask_t nest_imc_cpumask;
3131struct imc_pmu_ref * nest_imc_refc ;
3232static int nest_pmus ;
3333
34+ /* Core IMC data structures and variables */
35+
36+ static cpumask_t core_imc_cpumask ;
37+ struct imc_pmu_ref * core_imc_refc ;
38+ static struct imc_pmu * core_imc_pmu ;
39+
3440struct imc_pmu * imc_event_to_pmu (struct perf_event * event )
3541{
3642 return container_of (event -> pmu , struct imc_pmu , pmu );
@@ -62,11 +68,13 @@ static ssize_t imc_pmu_cpumask_get_attr(struct device *dev,
6268 struct imc_pmu * imc_pmu = container_of (pmu , struct imc_pmu , pmu );
6369 cpumask_t * active_mask ;
6470
65- /* Subsequenct patch will add more pmu types here */
6671 switch (imc_pmu -> domain ){
6772 case IMC_DOMAIN_NEST :
6873 active_mask = & nest_imc_cpumask ;
6974 break ;
75+ case IMC_DOMAIN_CORE :
76+ active_mask = & core_imc_cpumask ;
77+ break ;
7078 default :
7179 return 0 ;
7280 }
@@ -486,6 +494,240 @@ static int nest_imc_event_init(struct perf_event *event)
486494 return 0 ;
487495}
488496
497+ /*
498+ * core_imc_mem_init : Initializes memory for the current core.
499+ *
500+ * Uses alloc_pages_node() and uses the returned address as an argument to
501+ * an opal call to configure the pdbar. The address sent as an argument is
502+ * converted to physical address before the opal call is made. This is the
503+ * base address at which the core imc counters are populated.
504+ */
505+ static int core_imc_mem_init (int cpu , int size )
506+ {
507+ int phys_id , rc = 0 , core_id = (cpu / threads_per_core );
508+ struct imc_mem_info * mem_info ;
509+
510+ /*
511+ * alloc_pages_node() will allocate memory for core in the
512+ * local node only.
513+ */
514+ phys_id = topology_physical_package_id (cpu );
515+ mem_info = & core_imc_pmu -> mem_info [core_id ];
516+ mem_info -> id = core_id ;
517+
518+ /* We need only vbase for core counters */
519+ mem_info -> vbase = page_address (alloc_pages_node (phys_id ,
520+ GFP_KERNEL | __GFP_ZERO | __GFP_THISNODE ,
521+ get_order (size )));
522+ if (!mem_info -> vbase )
523+ return - ENOMEM ;
524+
525+ /* Init the mutex */
526+ core_imc_refc [core_id ].id = core_id ;
527+ mutex_init (& core_imc_refc [core_id ].lock );
528+
529+ rc = opal_imc_counters_init (OPAL_IMC_COUNTERS_CORE ,
530+ __pa ((void * )mem_info -> vbase ),
531+ get_hard_smp_processor_id (cpu ));
532+ if (rc ) {
533+ free_pages ((u64 )mem_info -> vbase , get_order (size ));
534+ mem_info -> vbase = NULL ;
535+ }
536+
537+ return rc ;
538+ }
539+
540+ static bool is_core_imc_mem_inited (int cpu )
541+ {
542+ struct imc_mem_info * mem_info ;
543+ int core_id = (cpu / threads_per_core );
544+
545+ mem_info = & core_imc_pmu -> mem_info [core_id ];
546+ if (!mem_info -> vbase )
547+ return false;
548+
549+ return true;
550+ }
551+
552+ static int ppc_core_imc_cpu_online (unsigned int cpu )
553+ {
554+ const struct cpumask * l_cpumask ;
555+ static struct cpumask tmp_mask ;
556+ int ret = 0 ;
557+
558+ /* Get the cpumask for this core */
559+ l_cpumask = cpu_sibling_mask (cpu );
560+
561+ /* If a cpu for this core is already set, then, don't do anything */
562+ if (cpumask_and (& tmp_mask , l_cpumask , & core_imc_cpumask ))
563+ return 0 ;
564+
565+ if (!is_core_imc_mem_inited (cpu )) {
566+ ret = core_imc_mem_init (cpu , core_imc_pmu -> counter_mem_size );
567+ if (ret ) {
568+ pr_info ("core_imc memory allocation for cpu %d failed\n" , cpu );
569+ return ret ;
570+ }
571+ }
572+
573+ /* set the cpu in the mask */
574+ cpumask_set_cpu (cpu , & core_imc_cpumask );
575+ return 0 ;
576+ }
577+
578+ static int ppc_core_imc_cpu_offline (unsigned int cpu )
579+ {
580+ unsigned int ncpu , core_id ;
581+ struct imc_pmu_ref * ref ;
582+
583+ /*
584+ * clear this cpu out of the mask, if not present in the mask,
585+ * don't bother doing anything.
586+ */
587+ if (!cpumask_test_and_clear_cpu (cpu , & core_imc_cpumask ))
588+ return 0 ;
589+
590+ /* Find any online cpu in that core except the current "cpu" */
591+ ncpu = cpumask_any_but (cpu_sibling_mask (cpu ), cpu );
592+
593+ if (ncpu >= 0 && ncpu < nr_cpu_ids ) {
594+ cpumask_set_cpu (ncpu , & core_imc_cpumask );
595+ perf_pmu_migrate_context (& core_imc_pmu -> pmu , cpu , ncpu );
596+ } else {
597+ /*
598+ * If this is the last cpu in this core then, skip taking refernce
599+ * count mutex lock for this core and directly zero "refc" for
600+ * this core.
601+ */
602+ opal_imc_counters_stop (OPAL_IMC_COUNTERS_CORE ,
603+ get_hard_smp_processor_id (cpu ));
604+ core_id = cpu / threads_per_core ;
605+ ref = & core_imc_refc [core_id ];
606+ if (!ref )
607+ return - EINVAL ;
608+
609+ ref -> refc = 0 ;
610+ }
611+ return 0 ;
612+ }
613+
614+ static int core_imc_pmu_cpumask_init (void )
615+ {
616+ return cpuhp_setup_state (CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE ,
617+ "perf/powerpc/imc_core:online" ,
618+ ppc_core_imc_cpu_online ,
619+ ppc_core_imc_cpu_offline );
620+ }
621+
622+ static void core_imc_counters_release (struct perf_event * event )
623+ {
624+ int rc , core_id ;
625+ struct imc_pmu_ref * ref ;
626+
627+ if (event -> cpu < 0 )
628+ return ;
629+ /*
630+ * See if we need to disable the IMC PMU.
631+ * If no events are currently in use, then we have to take a
632+ * mutex to ensure that we don't race with another task doing
633+ * enable or disable the core counters.
634+ */
635+ core_id = event -> cpu / threads_per_core ;
636+
637+ /* Take the mutex lock and decrement the refernce count for this core */
638+ ref = & core_imc_refc [core_id ];
639+ if (!ref )
640+ return ;
641+
642+ mutex_lock (& ref -> lock );
643+ ref -> refc -- ;
644+ if (ref -> refc == 0 ) {
645+ rc = opal_imc_counters_stop (OPAL_IMC_COUNTERS_CORE ,
646+ get_hard_smp_processor_id (event -> cpu ));
647+ if (rc ) {
648+ mutex_unlock (& ref -> lock );
649+ pr_err ("IMC: Unable to stop the counters for core %d\n" , core_id );
650+ return ;
651+ }
652+ } else if (ref -> refc < 0 ) {
653+ WARN (1 , "core-imc: Invalid event reference count\n" );
654+ ref -> refc = 0 ;
655+ }
656+ mutex_unlock (& ref -> lock );
657+ }
658+
659+ static int core_imc_event_init (struct perf_event * event )
660+ {
661+ int core_id , rc ;
662+ u64 config = event -> attr .config ;
663+ struct imc_mem_info * pcmi ;
664+ struct imc_pmu * pmu ;
665+ struct imc_pmu_ref * ref ;
666+
667+ if (event -> attr .type != event -> pmu -> type )
668+ return - ENOENT ;
669+
670+ /* Sampling not supported */
671+ if (event -> hw .sample_period )
672+ return - EINVAL ;
673+
674+ /* unsupported modes and filters */
675+ if (event -> attr .exclude_user ||
676+ event -> attr .exclude_kernel ||
677+ event -> attr .exclude_hv ||
678+ event -> attr .exclude_idle ||
679+ event -> attr .exclude_host ||
680+ event -> attr .exclude_guest )
681+ return - EINVAL ;
682+
683+ if (event -> cpu < 0 )
684+ return - EINVAL ;
685+
686+ event -> hw .idx = -1 ;
687+ pmu = imc_event_to_pmu (event );
688+
689+ /* Sanity check for config (event offset) */
690+ if (((config & IMC_EVENT_OFFSET_MASK ) > pmu -> counter_mem_size ))
691+ return - EINVAL ;
692+
693+ if (!is_core_imc_mem_inited (event -> cpu ))
694+ return - ENODEV ;
695+
696+ core_id = event -> cpu / threads_per_core ;
697+ pcmi = & core_imc_pmu -> mem_info [core_id ];
698+ if ((!pcmi -> vbase ))
699+ return - ENODEV ;
700+
701+ /* Get the core_imc mutex for this core */
702+ ref = & core_imc_refc [core_id ];
703+ if (!ref )
704+ return - EINVAL ;
705+
706+ /*
707+ * Core pmu units are enabled only when it is used.
708+ * See if this is triggered for the first time.
709+ * If yes, take the mutex lock and enable the core counters.
710+ * If not, just increment the count in core_imc_refc struct.
711+ */
712+ mutex_lock (& ref -> lock );
713+ if (ref -> refc == 0 ) {
714+ rc = opal_imc_counters_start (OPAL_IMC_COUNTERS_CORE ,
715+ get_hard_smp_processor_id (event -> cpu ));
716+ if (rc ) {
717+ mutex_unlock (& ref -> lock );
718+ pr_err ("core-imc: Unable to start the counters for core %d\n" ,
719+ core_id );
720+ return rc ;
721+ }
722+ }
723+ ++ ref -> refc ;
724+ mutex_unlock (& ref -> lock );
725+
726+ event -> hw .event_base = (u64 )pcmi -> vbase + (config & IMC_EVENT_OFFSET_MASK );
727+ event -> destroy = core_imc_counters_release ;
728+ return 0 ;
729+ }
730+
489731static u64 * get_event_base_addr (struct perf_event * event )
490732{
491733 /*
@@ -564,12 +806,15 @@ static int update_pmu_ops(struct imc_pmu *pmu)
564806 pmu -> pmu .attr_groups = pmu -> attr_groups ;
565807 pmu -> attr_groups [IMC_FORMAT_ATTR ] = & imc_format_group ;
566808
567- /* Subsequenct patch will add more pmu types here */
568809 switch (pmu -> domain ) {
569810 case IMC_DOMAIN_NEST :
570811 pmu -> pmu .event_init = nest_imc_event_init ;
571812 pmu -> attr_groups [IMC_CPUMASK_ATTR ] = & imc_pmu_cpumask_attr_group ;
572813 break ;
814+ case IMC_DOMAIN_CORE :
815+ pmu -> pmu .event_init = core_imc_event_init ;
816+ pmu -> attr_groups [IMC_CPUMASK_ATTR ] = & imc_pmu_cpumask_attr_group ;
817+ break ;
573818 default :
574819 break ;
575820 }
@@ -621,6 +866,22 @@ static int init_nest_pmu_ref(void)
621866 return 0 ;
622867}
623868
869+ static void cleanup_all_core_imc_memory (void )
870+ {
871+ int i , nr_cores = num_present_cpus () / threads_per_core ;
872+ struct imc_mem_info * ptr = core_imc_pmu -> mem_info ;
873+ int size = core_imc_pmu -> counter_mem_size ;
874+
875+ /* mem_info will never be NULL */
876+ for (i = 0 ; i < nr_cores ; i ++ ) {
877+ if (ptr [i ].vbase )
878+ free_pages ((u64 )ptr -> vbase , get_order (size ));
879+ }
880+
881+ kfree (ptr );
882+ kfree (core_imc_refc );
883+ }
884+
624885/*
625886 * Common function to unregister cpu hotplug callback and
626887 * free the memory.
@@ -641,6 +902,12 @@ static void imc_common_cpuhp_mem_free(struct imc_pmu *pmu_ptr)
641902 mutex_unlock (& nest_init_lock );
642903 }
643904
905+ /* Free core_imc memory */
906+ if (pmu_ptr -> domain == IMC_DOMAIN_CORE ) {
907+ cpuhp_remove_state (CPUHP_AP_PERF_POWERPC_CORE_IMC_ONLINE );
908+ cleanup_all_core_imc_memory ();
909+ }
910+
644911 /* Only free the attr_groups which are dynamically allocated */
645912 kfree (pmu_ptr -> attr_groups [IMC_EVENT_ATTR ]-> attrs );
646913 kfree (pmu_ptr -> attr_groups [IMC_EVENT_ATTR ]);
@@ -656,11 +923,11 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
656923 int pmu_index )
657924{
658925 const char * s ;
926+ int nr_cores ;
659927
660928 if (of_property_read_string (parent , "name" , & s ))
661929 return - ENODEV ;
662930
663- /* Subsequenct patch will add more pmu types here */
664931 switch (pmu_ptr -> domain ) {
665932 case IMC_DOMAIN_NEST :
666933 /* Update the pmu name */
@@ -671,6 +938,27 @@ static int imc_mem_init(struct imc_pmu *pmu_ptr, struct device_node *parent,
671938 /* Needed for hotplug/migration */
672939 per_nest_pmu_arr [pmu_index ] = pmu_ptr ;
673940 break ;
941+ case IMC_DOMAIN_CORE :
942+ /* Update the pmu name */
943+ pmu_ptr -> pmu .name = kasprintf (GFP_KERNEL , "%s%s" , s , "_imc" );
944+ if (!pmu_ptr -> pmu .name )
945+ return - ENOMEM ;
946+
947+ nr_cores = num_present_cpus () / threads_per_core ;
948+ pmu_ptr -> mem_info = kcalloc (nr_cores , sizeof (struct imc_mem_info ),
949+ GFP_KERNEL );
950+
951+ if (!pmu_ptr -> mem_info )
952+ return - ENOMEM ;
953+
954+ core_imc_refc = kcalloc (nr_cores , sizeof (struct imc_pmu_ref ),
955+ GFP_KERNEL );
956+
957+ if (!core_imc_refc )
958+ return - ENOMEM ;
959+
960+ core_imc_pmu = pmu_ptr ;
961+ break ;
674962 default :
675963 return - EINVAL ;
676964 }
@@ -696,7 +984,6 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
696984 if (ret )
697985 goto err_free ;
698986
699- /* Subsequenct patch will add more pmu types here */
700987 switch (pmu_ptr -> domain ) {
701988 case IMC_DOMAIN_NEST :
702989 /*
@@ -721,6 +1008,14 @@ int init_imc_pmu(struct device_node *parent, struct imc_pmu *pmu_ptr, int pmu_id
7211008 }
7221009 nest_pmus ++ ;
7231010 mutex_unlock (& nest_init_lock );
1011+ break ;
1012+ case IMC_DOMAIN_CORE :
1013+ ret = core_imc_pmu_cpumask_init ();
1014+ if (ret ) {
1015+ cleanup_all_core_imc_memory ();
1016+ return ret ;
1017+ }
1018+
7241019 break ;
7251020 default :
7261021 return -1 ; /* Unknown domain */
0 commit comments