Skip to content

Commit 2a8541e

Browse files
committed
cmd/ejobs: support min/max import semantics for module filtering
Change-Id: I008b092499705b20be9dfdc42a6d0a1a920ab57a Reviewed-on: https://go-review.googlesource.com/c/pkgsite-metrics/+/679355 Reviewed-by: Jonathan Amsterdam <[email protected]> LUCI-TryBot-Result: Go LUCI <[email protected]>
1 parent 0f64c81 commit 2a8541e

File tree

9 files changed

+36
-19
lines changed

9 files changed

+36
-19
lines changed

cmd/ejobs/main.go

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -47,6 +47,7 @@ var (
4747

4848
var (
4949
minImporters int // for start
50+
maxImporters int // for start
5051
noDeps bool // for start
5152
moduleFile string // for start
5253
waitInterval time.Duration // for wait
@@ -69,8 +70,10 @@ var commands = []command{
6970
"start a job",
7071
doStart,
7172
func(fs *flag.FlagSet) {
72-
fs.IntVar(&minImporters, "min", -1,
73+
fs.IntVar(&maxImporters, "min", -1,
7374
"run on modules with at least this many importers (<0: use server default of 10)")
75+
fs.IntVar(&minImporters, "max", -1,
76+
"run on modules with at most this many importers (<0: use server default of math.MaxInt)")
7477
fs.StringVar(&moduleFile, "file", "",
7578
"file with modules to use: each line is MODULE_PATH VERSION NUM_IMPORTERS")
7679
fs.BoolVar(&noDeps, "nodeps", false, "do not download dependencies for modules")
@@ -342,6 +345,9 @@ func doStart(ctx context.Context, args []string) error {
342345
if minImporters >= 0 {
343346
u += fmt.Sprintf("&min=%d", minImporters)
344347
}
348+
if maxImporters >= 0 {
349+
u += fmt.Sprintf("&max=%d", maxImporters)
350+
}
345351
if gcsPath != "" {
346352
gurl := "gs://" + gcsPath
347353
u += fmt.Sprintf("&file=%s", url.QueryEscape(gurl))

internal/analysis/analysis.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,7 @@ type EnqueueParams struct {
4545
Args string // command-line arguments to binary; split on whitespace
4646
Insecure bool // if true, run outside sandbox
4747
Min int // minimum import-by count for a module to be included
48+
Max int // maximum import-by count for a module to be included
4849
File string // path to file containing modules; if missing, use DB
4950
Suffix string // appended to task queue IDs to generate unique tasks
5051
User string // user initiating enqueue

internal/pkgsitedb/db.go

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -52,15 +52,15 @@ func redactPassword(dbinfo string) string {
5252
// ModuleSpecs retrieves all modules that contain packages that are
5353
// imported by minImportedByCount or more packages.
5454
// It looks for the information in the search_documents table of the given pkgsite DB.
55-
func ModuleSpecs(ctx context.Context, db *sql.DB, minImportedByCount int) (specs []scan.ModuleSpec, err error) {
55+
func ModuleSpecs(ctx context.Context, db *sql.DB, minImports, maxImports int) (specs []scan.ModuleSpec, err error) {
5656
defer derrors.Wrap(&err, "moduleSpecsFromDB")
5757
query := `
5858
SELECT module_path, version, max(imported_by_count)
5959
FROM search_documents
6060
GROUP BY module_path, version
61-
HAVING max(imported_by_count) >= $1
62-
ORDER by max(imported_by_count) desc`
63-
rows, err := db.QueryContext(ctx, query, minImportedByCount)
61+
HAVING max(imported_by_count) >= $1 AND max(imported_by_count) <= $2
62+
ORDER BY max(imported_by_count) desc`
63+
rows, err := db.QueryContext(ctx, query, minImports, maxImports)
6464
if err != nil {
6565
return nil, err
6666
}

internal/pkgsitedb/db_test.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@ import (
1212
"database/sql"
1313
"flag"
1414
"fmt"
15+
"math"
1516
"net/url"
1617
"strings"
1718
"testing"
@@ -50,7 +51,7 @@ func TestModuleSpecs(t *testing.T) {
5051
if err := db.PingContext(ctx); err != nil {
5152
t.Fatal(err)
5253
}
53-
got, err := ModuleSpecs(ctx, db, 1000)
54+
got, err := ModuleSpecs(ctx, db, 1000, math.MaxInt)
5455
if err != nil {
5556
t.Fatal(err)
5657
}

internal/scan/parse.go

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ type ModuleSpec struct {
3535
ImportedBy int
3636
}
3737

38-
func ParseCorpusFile(filename string, minImportedByCount int) (ms []ModuleSpec, err error) {
38+
func ParseCorpusFile(filename string, minImports, maxImports int) (ms []ModuleSpec, err error) {
3939
defer derrors.Wrap(&err, "ParseCorpusFile(%q)", filename)
4040
lines, err := ReadFileLines(filename)
4141
if err != nil {
@@ -60,7 +60,7 @@ func ParseCorpusFile(filename string, minImportedByCount int) (ms []ModuleSpec,
6060
if err != nil {
6161
return nil, fmt.Errorf("%v on line %q", err, line)
6262
}
63-
if n >= minImportedByCount {
63+
if minImports <= n && n <= maxImports {
6464
ms = append(ms, ModuleSpec{Path: path, Version: vers, ImportedBy: n})
6565
}
6666
}

internal/scan/parse_test.go

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package scan
66

77
import (
88
"flag"
9+
"math"
910
"net/http"
1011
"reflect"
1112
"slices"
@@ -100,7 +101,7 @@ func TestModuleURLPathError(t *testing.T) {
100101

101102
func TestParseCorpusFile(t *testing.T) {
102103
const file = "testdata/modules.txt"
103-
got, err := ParseCorpusFile(file, 1)
104+
got, err := ParseCorpusFile(file, 1, math.MaxInt)
104105
if err != nil {
105106
t.Fatal(err)
106107
}
@@ -114,7 +115,7 @@ func TestParseCorpusFile(t *testing.T) {
114115
t.Errorf("\n got %v\nwant %v", got, want)
115116
}
116117

117-
got, err = ParseCorpusFile(file, 10)
118+
got, err = ParseCorpusFile(file, 10, math.MaxInt)
118119
if err != nil {
119120
t.Fatal(err)
120121
}

internal/worker/analysis.go

Lines changed: 5 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -426,10 +426,13 @@ func readSource(file string, line int, nContext int) (_ string, err error) {
426426
func (s *analysisServer) handleEnqueue(w http.ResponseWriter, r *http.Request) (err error) {
427427
defer derrors.Wrap(&err, "analysisServer.handleEnqueue")
428428
ctx := r.Context()
429-
params := &analysis.EnqueueParams{Min: defaultMinImportedByCount}
429+
params := &analysis.EnqueueParams{Min: defaultMinImportedByCount, Max: defaultMaxImportedByCount}
430430
if err := scan.ParseParams(r, params); err != nil {
431431
return fmt.Errorf("%w: %v", derrors.InvalidArgument, err)
432432
}
433+
if params.Min > params.Max {
434+
return fmt.Errorf("%w: analysis: bad min/max range", derrors.InvalidArgument)
435+
}
433436
if params.Binary == "" {
434437
return fmt.Errorf("%w: analysis: missing binary", derrors.InvalidArgument)
435438
}
@@ -446,7 +449,7 @@ func (s *analysisServer) handleEnqueue(w http.ResponseWriter, r *http.Request) (
446449
if err != nil {
447450
return err
448451
}
449-
mods, err := readModules(ctx, s.cfg, params.File, params.Min)
452+
mods, err := readModules(ctx, s.cfg, params.File, params.Min, params.Max)
450453
if err != nil {
451454
return err
452455
}

internal/worker/enqueue.go

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -6,6 +6,7 @@ package worker
66

77
import (
88
"context"
9+
"math"
910
"sync"
1011

1112
"golang.org/x/pkgsite-metrics/internal/config"
@@ -16,24 +17,27 @@ import (
1617
"golang.org/x/pkgsite-metrics/internal/scan"
1718
)
1819

19-
const defaultMinImportedByCount = 10
20+
const (
21+
defaultMinImportedByCount = 10
22+
defaultMaxImportedByCount = math.MaxInt
23+
)
2024

21-
func readModules(ctx context.Context, cfg *config.Config, file string, minImpCount int) ([]scan.ModuleSpec, error) {
25+
func readModules(ctx context.Context, cfg *config.Config, file string, minImports, maxImports int) ([]scan.ModuleSpec, error) {
2226
if file != "" {
2327
log.Infof(ctx, "reading modules from file %s", file)
24-
return scan.ParseCorpusFile(file, minImpCount)
28+
return scan.ParseCorpusFile(file, minImports, maxImports)
2529
}
2630
log.Infof(ctx, "reading modules from DB %s", cfg.PkgsiteDBName)
27-
return readFromDB(ctx, cfg, minImpCount)
31+
return readFromDB(ctx, cfg, minImports, maxImports)
2832
}
2933

30-
func readFromDB(ctx context.Context, cfg *config.Config, minImportedByCount int) ([]scan.ModuleSpec, error) {
34+
func readFromDB(ctx context.Context, cfg *config.Config, minImports, maxImports int) ([]scan.ModuleSpec, error) {
3135
db, err := pkgsitedb.Open(ctx, cfg)
3236
if err != nil {
3337
return nil, err
3438
}
3539
defer db.Close()
36-
return pkgsitedb.ModuleSpecs(ctx, db, minImportedByCount)
40+
return pkgsitedb.ModuleSpecs(ctx, db, minImports, maxImports)
3741
}
3842

3943
func enqueueTasks(ctx context.Context, tasks []queue.Task, q queue.Queue, opts *queue.Options) (err error) {

internal/worker/govulncheck_enqueue.go

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,7 @@ import (
88
"context"
99
"errors"
1010
"fmt"
11+
"math"
1112
"net/http"
1213
"sort"
1314
"strings"
@@ -80,7 +81,7 @@ func createGovulncheckQueueTasks(ctx context.Context, cfg *config.Config, params
8081
)
8182
for _, mode := range modes {
8283
if modspecs == nil {
83-
modspecs, err = readModules(ctx, cfg, params.File, params.Min)
84+
modspecs, err = readModules(ctx, cfg, params.File, params.Min, math.MaxInt)
8485
if err != nil {
8586
return nil, err
8687
}

0 commit comments

Comments
 (0)