Skip to content

Commit abe5f48

Browse files
committed
Add support to monitor bpf programs.
BPF is very important component for modern Linux systems, and getting more features and adoptions. This commit enables atop to monitor BPF programs. The output looks like: ATOP - kerneltest002 2020/06/16 17:01:12 -------------- 10s elapsed PRC | sys 2.72s | user 4.85s | #proc 761 | #zombie 0 | #exit 250 | CPU | sys 29% | user 50% | irq 0% | idle 7915% | wait 8% | CPL | avg1 1.68 | avg5 1.05 | avg15 0.72 | csw 160979 | intr 66341 | [...] BPF_PROG_ID NAME TOTAL_TIME_NS RUN_CNT CPU AVG_TIME_NS 894 tracepoint__sch 83882 11 0% 7625.64 893 tracepoint__sch 43231 5 0% 8646.20 892 tracepoint__tas 34818 4 0% 8704.50 PID SYSCPU USRCPU VGROW RGROW RDDSK WRDSK EXC THR S CPUNR CPU CMD 1/113 2669644 0.45s 1.08s 603.1M 23100K 0K 0K - 10 S 59 15% squashfuse_ll To build atop with BPF monitoring, we need pass in option to make as: ATOP_BPF_SUPPORT=1 make -j Atop periodically enables monitoring of BPF programs calling: bpf_enable_stats(BPF_STATS_RUN_TIME); Since monitoring of BPF program has non-trivial overhead to the bpf programs, the following options are added to only monitor BPF program less often: bpfsamplerate, default 1 bpfsampleinterval, default 1 bpf stats is enabled for bpfsampleinterval seconds every bpfsamplerate atop intervals. bpfsampleinterval must be smaller than atop interval. Changes v1 => v2: 1. Instead of using unsafe sysctl, using a safe new API to enable BPF runtime stats. 2. Change output columns: remove "TYPE", add "CPU" for cpu %.
1 parent dd0fb8a commit abe5f48

File tree

12 files changed

+662
-156
lines changed

12 files changed

+662
-156
lines changed

Makefile

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,12 +28,18 @@ OBJMOD3 = showgeneric.o showlinux.o showsys.o showprocs.o
2828
OBJMOD4 = atopsar.o netatopif.o gpucom.o
2929
ALLMODS = $(OBJMOD0) $(OBJMOD1) $(OBJMOD2) $(OBJMOD3) $(OBJMOD4)
3030

31+
ifneq ($(ATOP_BPF_SUPPORT),)
32+
ALLMODS += photobpf.o
33+
ATOP_BPF_LDFLAGS = -lbpf
34+
CFLAGS += -DATOP_BPF_SUPPORT
35+
endif
36+
3137
VERS = $(shell ./atop -V 2>/dev/null| sed -e 's/^[^ ]* //' -e 's/ .*//')
3238

3339
all: atop atopsar atopacctd atopconvert atopcat
3440

3541
atop: atop.o $(ALLMODS) Makefile
36-
$(CC) atop.o $(ALLMODS) -o atop -lncursesw -lz -lm -lrt $(LDFLAGS)
42+
$(CC) atop.o $(ALLMODS) -o atop -lncursesw -lz -lm -lrt $(ATOP_BPF_LDFLAGS) $(LDFLAGS)
3743

3844
atopsar: atop
3945
ln -sf atop atopsar
@@ -187,7 +193,7 @@ versdate.h:
187193
./mkdate
188194

189195
atop.o: atop.h photoproc.h photosyst.h acctproc.h showgeneric.h
190-
atopsar.o: atop.h photoproc.h photosyst.h
196+
atopsar.o: atop.h photoproc.h photosyst.h
191197
rawlog.o: atop.h photoproc.h photosyst.h rawlog.h showgeneric.h
192198
various.o: atop.h acctproc.h
193199
ifprop.o: atop.h photosyst.h ifprop.h
@@ -200,7 +206,7 @@ photoproc.o: atop.h photoproc.h
200206
photosyst.o: atop.h photosyst.h
201207
showgeneric.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
202208
showlinux.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
203-
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
209+
showsys.o: atop.h photoproc.h photosyst.h showgeneric.h
204210
showprocs.o: atop.h photoproc.h photosyst.h showgeneric.h showlinux.h
205211
version.o: version.c version.h versdate.h
206212
gpucom.o: atop.h photoproc.h photosyst.h

atop.c

Lines changed: 68 additions & 26 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,11 @@
11
/*
22
** ATOP - System & Process Monitor
33
**
4-
** The program 'atop' offers the possibility to view the activity of
4+
** The program 'atop' offers the possibility to view the activity of
55
** the system on system-level as well as process-level.
66
**
77
** This source-file contains the main-function, which verifies the
8-
** calling-parameters and takes care of initialization.
8+
** calling-parameters and takes care of initialization.
99
** The engine-function drives the main sample-loop in which after the
1010
** indicated interval-time a snapshot is taken of the system-level and
1111
** process-level counters and the deviations are calculated and
@@ -35,7 +35,7 @@
3535
** --------------------------------------------------------------------------
3636
**
3737
** After initialization, the main-function calls the ENGINE.
38-
** For every cycle (so after another interval) the ENGINE calls various
38+
** For every cycle (so after another interval) the ENGINE calls various
3939
** functions as shown below:
4040
**
4141
** +---------------------------------------------------------------------+
@@ -48,15 +48,15 @@
4848
** | | ^ | ^ | ^ | ^ | | |
4949
** +---|-----|--------|-----|--------|----|--------|----|--------|----|--+
5050
** | | | | | | | | | |
51-
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
51+
** +--V-----|--+ +--V-----|--+ +--V----|--+ +--V----|--+ +--V----|-+
5252
** | | | | | | | | | |
5353
** | photosyst | | photoproc | | acct | | deviate | | print |
5454
** | | | | |photoproc | | ...syst | | |
5555
** | | | | | | | ...proc | | |
56-
** +-----------+ +-----------+ +----------+ +----------+ +---------+
56+
** +-----------+ +-----------+ +----------+ +----------+ +---------+
5757
** ^ ^ ^ ^ |
5858
** | | | | |
59-
** | | | V V
59+
** | | | V V
6060
** ______ _________ __________ ________ _________
6161
** / \ / \ / \ / \ / \
6262
** /proc /proc accounting task screen or
@@ -84,8 +84,8 @@
8484
** When all counters have been gathered, functions are called to calculate
8585
** the difference between the current counter-values and the counter-values
8686
** of the previous cycle. These functions operate on the system-level
87-
** as well as on the task-level counters.
88-
** These differences are stored in a new structure(-table).
87+
** as well as on the task-level counters.
88+
** These differences are stored in a new structure(-table).
8989
**
9090
** - deviatsyst()
9191
** Calculates the differences between the current system-level
@@ -98,7 +98,7 @@
9898
** task-database; this "database" is implemented as a linked list
9999
** of taskinfo structures in memory (so no disk-accesses needed).
100100
** Within this linked list hash-buckets are maintained for fast searches.
101-
** The entire task-database is handled via a set of well-defined
101+
** The entire task-database is handled via a set of well-defined
102102
** functions from which the name starts with "pdb_..." (see the
103103
** source-file procdbase.c).
104104
** The processes which have been finished during the last cycle
@@ -112,7 +112,7 @@
112112
** these addresses can be modified in the main-function depending on particular
113113
** flags. In this way various representation-layers (ASCII, graphical, ...)
114114
** can be linked with 'atop'; the one to use can eventually be chosen
115-
** at runtime.
115+
** at runtime.
116116
**
117117
** $Log: atop.c,v $
118118
** Revision 1.49 2010/10/23 14:01:00 gerlof
@@ -296,6 +296,7 @@
296296
#include "showgeneric.h"
297297
#include "parseable.h"
298298
#include "gpucom.h"
299+
#include "photobpf.h"
299300

300301
#define allflags "ab:cde:fghijklmnopqrstuvwxyz1ABCDEFGHIJKL:MNOP:QRSTUVWXYZ"
301302
#define MAXFL 64 /* maximum number of command-line flags */
@@ -322,6 +323,16 @@ char threadview = 0; /* boolean: show individual threads */
322323
char calcpss = 0; /* boolean: read/calculate process PSS */
323324
char getwchan = 0; /* boolean: obtain wchan string */
324325

326+
/*
327+
** arguments for bpf stats sampling
328+
** We enable bpf stats for bpfsampleinterval seconds every bpfsamplerate
329+
** atop intervals. bpfsampleinterval must be smaller than atop interval.
330+
**
331+
** If bpfsamplerate == 0, disable sampling of bpf stats.
332+
*/
333+
unsigned int bpfsamplerate = 1;
334+
unsigned int bpfsampleinterval = 1;
335+
325336
unsigned short hertz;
326337
unsigned int pagesize;
327338
unsigned int nrgpus;
@@ -391,6 +402,9 @@ void do_almostcrit(char *, char *);
391402
void do_atopsarflags(char *, char *);
392403
void do_pacctdir(char *, char *);
393404
void do_perfevents(char *, char *);
405+
void do_bpflines(char *, char *);
406+
void do_bpfsamplerate(char *, char *);
407+
void do_bpfsampleinterval(char *, char *);
394408

395409
static struct {
396410
char *tag;
@@ -440,6 +454,9 @@ static struct {
440454
{ "atopsarflags", do_atopsarflags, 0, },
441455
{ "perfevents", do_perfevents, 0, },
442456
{ "pacctdir", do_pacctdir, 1, },
457+
{ "bpflines", do_bpflines, 0, },
458+
{ "bpfsamplerate", do_bpfsamplerate, 0, },
459+
{ "bpfsampleinterval", do_bpfsampleinterval, 0, },
443460
};
444461

445462
/*
@@ -466,6 +483,8 @@ main(int argc, char *argv[])
466483
exit(42);
467484
}
468485

486+
photo_bpf_check();
487+
469488
/*
470489
** preserve command arguments to allow restart of other version
471490
*/
@@ -497,12 +516,12 @@ main(int argc, char *argv[])
497516
if ( memcmp(p, "atopsar", 7) == 0)
498517
return atopsar(argc, argv);
499518

500-
/*
501-
** interpret command-line arguments & flags
519+
/*
520+
** interpret command-line arguments & flags
502521
*/
503522
if (argc > 1)
504523
{
505-
/*
524+
/*
506525
** gather all flags for visualization-functions
507526
**
508527
** generic flags will be handled here;
@@ -600,17 +619,17 @@ main(int argc, char *argv[])
600619
}
601620

602621
/*
603-
** get optional interval-value and optional number of samples
622+
** get optional interval-value and optional number of samples
604623
*/
605624
if (optind < argc && optind < MAXFL)
606625
{
607626
if (!numeric(argv[optind]))
608627
prusage(argv[0]);
609-
628+
610629
interval = atoi(argv[optind]);
611-
630+
612631
optind++;
613-
632+
614633
if (optind < argc)
615634
{
616635
if (!numeric(argv[optind]) )
@@ -766,6 +785,7 @@ engine(void)
766785
gpupending=0; /* boolean: request sent */
767786

768787
struct gpupidstat *gp = NULL;
788+
struct bstats *bstats = NULL;
769789

770790
/*
771791
** initialization: allocate required memory dynamically
@@ -817,6 +837,8 @@ engine(void)
817837
if (nrgpus)
818838
supportflags |= GPUSTAT;
819839

840+
if (system_support_bpf())
841+
supportflags |= BPFSTAT;
820842
/*
821843
** MAIN-LOOP:
822844
** - Wait for the requested number of seconds or for other trigger
@@ -838,11 +860,15 @@ engine(void)
838860
/*
839861
** if the limit-flag is specified:
840862
** check if the next sample is expected before midnight;
841-
** if not, stop atop now
863+
** if not, stop atop now
842864
*/
843865
if (midnightflag && (curtime+interval) > timelimit)
844866
break;
845867

868+
if ((supportflags & BPFSTAT) &&
869+
bpfsamplerate && sampcnt % bpfsamplerate == 0)
870+
bstats = get_devbstats();
871+
846872
/*
847873
** wait for alarm-signal to arrive (except first sample)
848874
** or wait for SIGUSR1/SIGUSR2
@@ -859,13 +885,13 @@ engine(void)
859885
curtime = time(0); /* seconds since 1-1-1970 */
860886

861887
/*
862-
** send request for statistics to atopgpud
888+
** send request for statistics to atopgpud
863889
*/
864890
if (nrgpus)
865891
gpupending = gpud_statrequest();
866892

867893
/*
868-
** take a snapshot of the current system-level statistics
894+
** take a snapshot of the current system-level statistics
869895
** and calculate the deviations (i.e. calculate the activity
870896
** during the last sample)
871897
*/
@@ -918,7 +944,7 @@ engine(void)
918944
curtime-pretime > 0 ? curtime-pretime : 1);
919945

920946
/*
921-
** take a snapshot of the current task-level statistics
947+
** take a snapshot of the current task-level statistics
922948
** and calculate the deviations (i.e. calculate the activity
923949
** during the last sample)
924950
**
@@ -1013,10 +1039,14 @@ engine(void)
10131039
** the deviations
10141040
*/
10151041
lastcmd = (vis.show_samp)( curtime,
1016-
curtime-pretime > 0 ? curtime-pretime : 1,
1017-
&devtstat, devsstat,
1018-
nprocexit, noverflow, sampcnt==0);
1042+
curtime-pretime > 0 ? curtime-pretime : 1,
1043+
&devtstat, devsstat, bstats,
1044+
nprocexit, noverflow, sampcnt==0);
10191045

1046+
if (bstats) {
1047+
free(bstats->bpfall);
1048+
bstats = NULL;
1049+
}
10201050
/*
10211051
** release dynamically allocated memory
10221052
*/
@@ -1065,7 +1095,7 @@ prusage(char *myname)
10651095
printf("\t -%c show version information\n", MVERSION);
10661096
printf("\t -%c show or log all processes (i.s.o. active processes "
10671097
"only)\n", MALLPROC);
1068-
printf("\t -%c calculate proportional set size (PSS) per process\n",
1098+
printf("\t -%c calculate proportional set size (PSS) per process\n",
10691099
MCALCPSS);
10701100
printf("\t -%c determine WCHAN (string) per thread\n", MGETWCHAN);
10711101
printf("\t -P generate parseable output for specified label(s)\n");
@@ -1146,6 +1176,18 @@ do_linelength(char *name, char *val)
11461176
linelen = get_posval(name, val);
11471177
}
11481178

1179+
void
1180+
do_bpfsamplerate(char *name, char *val)
1181+
{
1182+
bpfsamplerate = get_posval(name, val);
1183+
}
1184+
1185+
void
1186+
do_bpfsampleinterval(char *name, char *val)
1187+
{
1188+
bpfsampleinterval = get_posval(name, val);
1189+
}
1190+
11491191
/*
11501192
** read RC-file and modify defaults accordingly
11511193
*/
@@ -1196,7 +1238,7 @@ readrc(char *path, int syslevel)
11961238
default:
11971239
if (tagname[0] == '#')
11981240
continue;
1199-
1241+
12001242
if (tagvalue[0] != '#')
12011243
break;
12021244

atop.h

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -42,8 +42,9 @@ struct tstat;
4242
struct devtstat;
4343
struct sstat;
4444
struct netpertask;
45+
struct bstats;
4546

46-
/*
47+
/*
4748
** miscellaneous flags
4849
*/
4950
#define RRBOOT 0x0001
@@ -57,7 +58,7 @@ struct netpertask;
5758

5859
struct visualize {
5960
char (*show_samp) (time_t, int,
60-
struct devtstat *, struct sstat *,
61+
struct devtstat *, struct sstat *, struct bstats *,
6162
int, unsigned int, char);
6263
void (*show_error) (const char *, ...);
6364
void (*show_end) (void);
@@ -105,6 +106,9 @@ extern int netbadness;
105106
extern int pagbadness;
106107
extern int almostcrit;
107108

109+
extern int bpflines;
110+
extern unsigned int bpfsampleinterval;
111+
108112
/*
109113
** bit-values for supportflags
110114
*/
@@ -114,9 +118,10 @@ extern int almostcrit;
114118
#define NETATOPD 0x00000020
115119
#define DOCKSTAT 0x00000040
116120
#define GPUSTAT 0x00000080
121+
#define BPFSTAT 0x00000100
117122

118123
/*
119-
** in rawlog file, the four least significant bits
124+
** in rawlog file, the four least significant bits
120125
** are moved to the per-sample flags and therefor dummy
121126
** in the support flags of the general header
122127
*/
@@ -126,7 +131,7 @@ extern int almostcrit;
126131
** structure containing the start-addresses of functions for visualization
127132
*/
128133
char generic_samp (time_t, int,
129-
struct devtstat *, struct sstat *,
134+
struct devtstat *, struct sstat *, struct bstats *,
130135
int, unsigned int, char);
131136
void generic_error(const char *, ...);
132137
void generic_end (void);
@@ -168,7 +173,7 @@ int contcompar(const void *, const void *);
168173
count_t subcount(count_t, count_t);
169174
int rawread(void);
170175
char rawwrite (time_t, int,
171-
struct devtstat *, struct sstat *,
176+
struct devtstat *, struct sstat *, struct bstats *,
172177
int, unsigned int, char);
173178

174179
int numeric(char *);

0 commit comments

Comments
 (0)