Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 25 additions & 4 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,9 @@ Package vendoring is handled with [`govendor`](https://github.com/kardianos/gove

### Flags

* `help`
Show context-sensitive help (also try --help-long and --help-man).

* `web.listen-address`
Address to listen on for web interface and telemetry. Default is `:9187`.

Expand All @@ -55,6 +58,9 @@ Package vendoring is handled with [`govendor`](https://github.com/kardianos/gove
* `disable-settings-metrics`
Use the flag if you don't want to scrape `pg_settings`.

* `auto-discover-databases`
Whether to discover the databases on a server dynamically.

* `extend.query-path`
Path to a YAML file containing custom queries to run. Check out [`queries.yaml`](queries.yaml)
for examples of the format.
Expand All @@ -63,16 +69,22 @@ Package vendoring is handled with [`govendor`](https://github.com/kardianos/gove
Do not run - print the internal representation of the metric maps. Useful when debugging a custom
queries file.

* `constantLabels`
Labels to set in all metrics. A list of `label=value` pairs, separated by commas.

* `version`
Show application version.

* `exclude-databases`
A list of databases to remove when autoDiscoverDatabases is enabled.

* `log.level`
Set logging level: one of `debug`, `info`, `warn`, `error`, `fatal`

* `log.format`
Set the log output target and format. e.g. `logger:syslog?appname=bob&local=7` or `logger:stdout?json=true`
Defaults to `logger:stderr`.

* `constantLabels`
Labels to set in all metrics. A list of `label=value` pairs, separated by commas.

### Environment Variables

The following environment variables configure the exporter:
Expand All @@ -85,6 +97,9 @@ The following environment variables configure the exporter:
an alternative to `DATA_SOURCE_NAME` which exclusively accepts the raw URI
without a username and password component.

* `DATA_SOURCE_URI_FILE`
The same as above but reads the URI from a file.

* `DATA_SOURCE_USER`
When using `DATA_SOURCE_URI`, this environment variable is used to specify
the username.
Expand All @@ -111,13 +126,19 @@ The following environment variables configure the exporter:
* `PG_EXPORTER_DISABLE_SETTINGS_METRICS`
Use the flag if you don't want to scrape `pg_settings`. Value can be `true` or `false`. Defauls is `false`.

* `PG_EXPORTER_AUTO_DISCOVER_DATABASES`
Whether to discover the databases on a server dynamically. Value can be `true` or `false`. Defauls is `false`.

* `PG_EXPORTER_EXTEND_QUERY_PATH`
Path to a YAML file containing custom queries to run. Check out [`queries.yaml`](queries.yaml)
for examples of the format.

* `PG_EXPORTER_CONSTANT_LABELS`
Labels to set in all metrics. A list of `label=value` pairs, separated by commas.

* `PG_EXPORTER_EXCLUDE_DATABASES`
A comma-separated list of databases to remove when autoDiscoverDatabases is enabled. Default is empty string.

Settings set by environment variables starting with `PG_` will be overwritten by the corresponding CLI flag if given.

### Setting the Postgres server's data source name
Expand Down Expand Up @@ -163,7 +184,7 @@ flag. This removes all built-in metrics, and uses only metrics defined by querie

### Automatically discover databases
To scrape metrics from all databases on a database server, the database DSN's can be dynamically discovered via the
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false` is run for all configured DSN's. From the
`--auto-discover-databases` flag. When true, `SELECT datname FROM pg_database WHERE datallowconn = true AND datistemplate = false and datname != current_database()` is run for all configured DSN's. From the
result a new set of DSN's is created for which the metrics are scraped.

In addition, the option `--exclude-databases` adds the possibily to filter the result from the auto discovery to discard databases you do not need.
Expand Down
116 changes: 106 additions & 10 deletions cmd/postgres_exporter/postgres_exporter.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,13 +23,26 @@ import (
"github.com/prometheus/client_golang/prometheus"
"github.com/prometheus/client_golang/prometheus/promhttp"
"github.com/prometheus/common/log"
"github.com/prometheus/common/version"
"gopkg.in/alecthomas/kingpin.v2"
"gopkg.in/yaml.v2"
)

// Branch is set during build to the git branch.
var Branch string

// BuildDate is set during build to the ISO-8601 date and time.
var BuildDate string

// Revision is set during build to the git commit revision.
var Revision string

// Version is set during build to the git describe version
// (semantic version)-(commitish) form.
var Version = "0.0.1"
var Version = "0.0.1-rev"

// VersionShort is set during build to the semantic version.
var VersionShort = "0.0.1"

var (
listenAddress = kingpin.Flag("web.listen-address", "Address to listen on for web interface and telemetry.").Default(":9187").Envar("PG_EXPORTER_WEB_LISTEN_ADDRESS").String()
Expand Down Expand Up @@ -101,6 +114,7 @@ type Mapping map[string]MappingOptions
type UserQuery struct {
Query string `yaml:"query"`
Metrics []Mapping `yaml:"metrics"`
Master bool `yaml:"master"` // Querying only for master database
CacheSeconds uint64 `yaml:"cache_seconds"` // Number of seconds to cache the namespace result metrics for.
}

Expand Down Expand Up @@ -140,13 +154,15 @@ func (cm *ColumnMapping) UnmarshalYAML(unmarshal func(interface{}) error) error
// This is mainly so we can parse cacheSeconds around.
type intermediateMetricMap struct {
columnMappings map[string]ColumnMapping
master bool
cacheSeconds uint64
}

// MetricMapNamespace groups metric maps under a shared set of labels.
type MetricMapNamespace struct {
labels []string // Label names for this namespace
columnMappings map[string]MetricMap // Column mappings in this namespace
master bool // Call query only for master database
cacheSeconds uint64 // Number of seconds this metric namespace can be cached. 0 disables.
}

Expand Down Expand Up @@ -190,6 +206,23 @@ func dumpMaps() {
}

var builtinMetricMaps = map[string]intermediateMetricMap{
"pg_stat_bgwriter": {
map[string]ColumnMapping{
"checkpoints_timed": {COUNTER, "Number of scheduled checkpoints that have been performed", nil, nil},
"checkpoints_req": {COUNTER, "Number of requested checkpoints that have been performed", nil, nil},
"checkpoint_write_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are written to disk, in milliseconds", nil, nil},
"checkpoint_sync_time": {COUNTER, "Total amount of time that has been spent in the portion of checkpoint processing where files are synchronized to disk, in milliseconds", nil, nil},
"buffers_checkpoint": {COUNTER, "Number of buffers written during checkpoints", nil, nil},
"buffers_clean": {COUNTER, "Number of buffers written by the background writer", nil, nil},
"maxwritten_clean": {COUNTER, "Number of times the background writer stopped a cleaning scan because it had written too many buffers", nil, nil},
"buffers_backend": {COUNTER, "Number of buffers written directly by a backend", nil, nil},
"buffers_backend_fsync": {COUNTER, "Number of times a backend had to execute its own fsync call (normally the background writer handles those even when the backend does its own write)", nil, nil},
"buffers_alloc": {COUNTER, "Number of buffers allocated", nil, nil},
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
},
true,
0,
},
"pg_stat_database": {
map[string]ColumnMapping{
"datid": {LABEL, "OID of a database", nil, nil},
Expand All @@ -212,6 +245,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
"blk_write_time": {COUNTER, "Time spent writing data file blocks by backends in this database, in milliseconds", nil, nil},
"stats_reset": {COUNTER, "Time at which these statistics were last reset", nil, nil},
},
true,
0,
},
"pg_stat_database_conflicts": {
Expand All @@ -224,6 +258,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
"confl_bufferpin": {COUNTER, "Number of queries in this database that have been canceled due to pinned buffers", nil, nil},
"confl_deadlock": {COUNTER, "Number of queries in this database that have been canceled due to deadlocks", nil, nil},
},
true,
0,
},
"pg_locks": {
Expand All @@ -232,6 +267,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
"mode": {LABEL, "Type of Lock", nil, nil},
"count": {GAUGE, "Number of locks", nil, nil},
},
true,
0,
},
"pg_stat_replication": {
Expand Down Expand Up @@ -277,6 +313,21 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
"flush_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written and flushed it (but not yet applied it). This can be used to gauge the delay that synchronous_commit level remote_flush incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
"replay_lag": {DISCARD, "Time elapsed between flushing recent WAL locally and receiving notification that this standby server has written, flushed and applied it. This can be used to gauge the delay that synchronous_commit level remote_apply incurred while committing if this server was configured as a synchronous standby.", nil, semver.MustParseRange(">=10.0.0")},
},
true,
0,
},
"pg_stat_archiver": {
map[string]ColumnMapping{
"archived_count": {COUNTER, "Number of WAL files that have been successfully archived", nil, nil},
"last_archived_wal": {DISCARD, "Name of the last WAL file successfully archived", nil, nil},
"last_archived_time": {DISCARD, "Time of the last successful archive operation", nil, nil},
"failed_count": {COUNTER, "Number of failed attempts for archiving WAL files", nil, nil},
"last_failed_wal": {DISCARD, "Name of the WAL file of the last failed archival operation", nil, nil},
"last_failed_time": {DISCARD, "Time of the last failed archival operation", nil, nil},
"stats_reset": {DISCARD, "Time at which these statistics were last reset", nil, nil},
"last_archive_age": {GAUGE, "Time in seconds since last WAL segment was successfully archived", nil, nil},
},
true,
0,
},
"pg_stat_activity": {
Expand All @@ -286,6 +337,7 @@ var builtinMetricMaps = map[string]intermediateMetricMap{
"count": {GAUGE, "number of connections in this state", nil, nil},
"max_tx_duration": {GAUGE, "max duration in seconds any active transaction has been running", nil, nil},
},
true,
0,
},
}
Expand Down Expand Up @@ -356,6 +408,17 @@ var queryOverrides = map[string][]OverrideQuery{
},
},

"pg_stat_archiver": {
{
semver.MustParseRange(">=0.0.0"),
`
SELECT *,
extract(epoch from now() - last_archived_time) AS last_archive_age
FROM pg_stat_archiver
`,
},
},

"pg_stat_activity": {
// This query only works
{
Expand Down Expand Up @@ -445,6 +508,7 @@ func parseUserQueries(content []byte) (map[string]intermediateMetricMap, map[str
newMetricMap := make(map[string]ColumnMapping)
metricMap = intermediateMetricMap{
columnMappings: newMetricMap,
master: specs.Master,
cacheSeconds: specs.CacheSeconds,
}
metricMaps[metric] = metricMap
Expand Down Expand Up @@ -615,7 +679,7 @@ func makeDescMap(pgVersion semver.Version, serverLabels prometheus.Labels, metri
}
}

metricMap[namespace] = MetricMapNamespace{variableLabels, thisMap, intermediateMappings.cacheSeconds}
metricMap[namespace] = MetricMapNamespace{variableLabels, thisMap, intermediateMappings.master, intermediateMappings.cacheSeconds}
}

return metricMap
Expand Down Expand Up @@ -858,7 +922,7 @@ func (s *Server) Scrape(ch chan<- prometheus.Metric, disableSettingsMetrics bool

var err error

if (!disableSettingsMetrics && !*autoDiscoverDatabases) || (!disableSettingsMetrics && *autoDiscoverDatabases && s.master) {
if !disableSettingsMetrics && s.master {
if err = querySettings(ch, s); err != nil {
err = fmt.Errorf("error retrieving settings: %s", err)
}
Expand Down Expand Up @@ -1258,6 +1322,12 @@ func queryNamespaceMappings(ch chan<- prometheus.Metric, server *Server) map[str

for namespace, mapping := range server.metricMap {
log.Debugln("Querying namespace: ", namespace)

if mapping.master && !server.master {
log.Debugln("Query skipped...")
continue
}

scrapeMetric := false
// Check if the metric is cached
server.cacheMtx.Lock()
Expand Down Expand Up @@ -1336,12 +1406,13 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server)
log.Infof("Semantic Version Changed on %q: %s -> %s", server, server.lastMapVersion, semanticVersion)
server.mappingMtx.Lock()

if e.disableDefaultMetrics || (!e.disableDefaultMetrics && e.autoDiscoverDatabases && !server.master) {
server.metricMap = make(map[string]MetricMapNamespace)
server.queryOverrides = make(map[string]string)
} else {
// Get Default Metrics only for master database
if !e.disableDefaultMetrics && server.master {
server.metricMap = makeDescMap(semanticVersion, server.labels, e.builtinMetricMaps)
server.queryOverrides = makeQueryOverrideMap(semanticVersion, queryOverrides)
} else {
server.metricMap = make(map[string]MetricMapNamespace)
server.queryOverrides = make(map[string]string)
}

server.lastMapVersion = semanticVersion
Expand Down Expand Up @@ -1371,11 +1442,11 @@ func (e *Exporter) checkMapVersions(ch chan<- prometheus.Metric, server *Server)
server.mappingMtx.Unlock()
}

// Output the version as a special metric
// Output the version as a special metric only for master database
versionDesc := prometheus.NewDesc(fmt.Sprintf("%s_%s", namespace, staticLabelName),
"Version string as reported by postgres", []string{"version", "short_version"}, server.labels)

if !e.disableDefaultMetrics && (server.master && e.autoDiscoverDatabases) {
if !e.disableDefaultMetrics && server.master {
ch <- prometheus.MustNewConstMetric(versionDesc,
prometheus.UntypedValue, 1, versionString, semanticVersion.String())
}
Expand Down Expand Up @@ -1440,6 +1511,7 @@ func (e *Exporter) discoverDatabaseDSNs() []string {
continue
}

// If autoDiscoverDatabases is true, set first dsn as master database (Default: false)
server.master = true

databaseNames, err := queryDatabases(server)
Expand Down Expand Up @@ -1468,10 +1540,16 @@ func (e *Exporter) discoverDatabaseDSNs() []string {

func (e *Exporter) scrapeDSN(ch chan<- prometheus.Metric, dsn string) error {
server, err := e.servers.GetServer(dsn)

if err != nil {
return &ErrorConnectToServer{fmt.Sprintf("Error opening connection to database (%s): %s", loggableDSN(dsn), err.Error())}
}

// Check if autoDiscoverDatabases is false, set dsn as master database (Default: false)
if !e.autoDiscoverDatabases {
server.master = true
}

// Check if map versions need to be updated
if err := e.checkMapVersions(ch, server); err != nil {
log.Warnln("Proceeding with outdated query maps, as the Postgres version could not be determined:", err)
Expand Down Expand Up @@ -1522,6 +1600,7 @@ func getDataSources() []string {
if len(dsn) == 0 {
var user string
var pass string
var uri string

if len(os.Getenv("DATA_SOURCE_USER_FILE")) != 0 {
fileContents, err := ioutil.ReadFile(os.Getenv("DATA_SOURCE_USER_FILE"))
Expand Down Expand Up @@ -1559,7 +1638,17 @@ func getDataSources() []string {
}

ui := url.UserPassword(user, pass).String()
uri := os.Getenv("DATA_SOURCE_URI")

if len(os.Getenv("DATA_SOURCE_URI_FILE")) != 0 {
fileContents, err := ioutil.ReadFile(os.Getenv("DATA_SOURCE_URI_FILE"))
if err != nil {
panic(err)
}
uri = strings.TrimSpace(string(fileContents))
} else {
uri = os.Getenv("DATA_SOURCE_URI")
}

dsn = "postgresql://" + ui + "@" + uri

return []string{dsn}
Expand Down Expand Up @@ -1614,6 +1703,13 @@ func main() {
exporter.servers.Close()
}()

// Setup build info metric.
version.Branch = Branch
version.BuildDate = BuildDate
version.Revision = Revision
version.Version = VersionShort
prometheus.MustRegister(version.NewCollector("postgres_exporter"))

prometheus.MustRegister(exporter)

http.Handle(*metricPath, promhttp.Handler())
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -117,6 +117,7 @@ func (s *IntegrationSuite) TestUnknownMetricParsingDoesntCrash(c *C) {
for k := range exporter.builtinMetricMaps {
emptyMaps[k] = intermediateMetricMap{
map[string]ColumnMapping{},
true,
0,
}
}
Expand Down
1 change: 1 addition & 0 deletions cmd/postgres_exporter/postgres_exporter_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,7 @@ func (s *FunctionalSuite) TestSemanticVersionColumnDiscard(c *C) {
"metric_which_stays": {COUNTER, "This metric should not be eliminated", nil, nil},
"metric_which_discards": {COUNTER, "This metric should be forced to DISCARD", nil, nil},
},
true,
0,
},
}
Expand Down
Loading