diff --git a/dashboards/grafana/apimon_tests/api_errors.yaml b/dashboards/grafana/apimon_tests/api_errors.yaml new file mode 100644 index 0000000..d9e58a8 --- /dev/null +++ b/dashboards/grafana/apimon_tests/api_errors.yaml @@ -0,0 +1,21 @@ +--- +order: 2 +gridPos: + w: 12 + h: 8 + x: 12 + y: 2 +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 6 +options: + legend: {calcs: [], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "maximumAbove(aliasByMetric(groupByNode(consolidateBy(exclude(stats.timers.openstack.api.$environment.$zone.*.*.*.{4*,5*}.count, '404|dns.GET.zone.400|dns.GET.zone_recordset.400'), 'sum'), 9, 'sum')), 0)" +title: "Count of API Errors" +description: "Bad responses (4*, 5*, excluding 404)" +type: "timeseries" diff --git a/dashboards/grafana/apimon_tests/dashboard.yaml b/dashboards/grafana/apimon_tests/dashboard.yaml new file mode 100644 index 0000000..8d5fbf0 --- /dev/null +++ b/dashboards/grafana/apimon_tests/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "APImon Test Results" +uid: "ApimonTestResults" +description: "APImon Test Results" diff --git a/dashboards/grafana/apimon_tests/logs.yaml b/dashboards/grafana/apimon_tests/logs.yaml new file mode 100644 index 0000000..0db2b67 --- /dev/null +++ b/dashboards/grafana/apimon_tests/logs.yaml @@ -0,0 +1,76 @@ +--- +order: 5 +datasource: apimon_db +gridPos: + w: 24 + h: 12 + y: 5 +fieldConfig: + defaults: + custom: + displayMode: auto + filterable: false + thresholds: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 1 + overrides: + - matcher: + id: byName + options: job_id + properties: + - id: links + value: + - targetBlank: true + url: https://swift.eu-de.otc.t-systems.com/v1/AUTH_8deb45bad78e4dfc95223d819d0c3909/job_logs/${__data.fields.job_id}/job-output.txt + - matcher: + id: byName + options: result + properties: + - id: thresholds + value: + mode: absolute + steps: + - color: green + value: null + - color: red + value: 1 + - id: mappings + value: + - from: '' + id: 1 + text: Success + to: '' + type: 1 + value: '0' + - from: '' + id: 2 + text: Failed + to: '' + type: 1 + value: '3' + - id: custom.displayMode + value: color-background + - matcher: + id: byName + options: duration + properties: + - id: unit + value: ms +options: + showHeader: true +targets: + - format: table + group: [] + metricColumn: none + rawQuery: true + rawSql: "SELECT\n \"timestamp\" AS \"time\",\n \"name\",\n \"job_id\",\n\ + \ \"result\",\n \"duration\"\nFROM result_summary\nWHERE\n $__timeFilter(\"\ + timestamp\") AND\n environment = '$environment' AND\n zone = $zone AND\n\ + \ result = 3\nORDER BY 1 DESC" + refId: A +title: "Test Logs" +type: "table" diff --git a/dashboards/grafana/apimon_tests/no_response.yaml b/dashboards/grafana/apimon_tests/no_response.yaml new file mode 100644 index 0000000..c021889 --- /dev/null +++ b/dashboards/grafana/apimon_tests/no_response.yaml @@ -0,0 +1,22 @@ +--- +order: 2 +gridPos: + w: 12 + h: 8 + y: 2 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {showLegend: false} + tooltip: {mode: "single", sort: "none"} + alertThreshold: true +targets: + - refId: "A" + target: "alias(consolidateBy(sumSeries(exclude(stats.counters.openstack.api.$environment.$zone.*.*.*.failed.count, 'modelarts')), 'sum'), 'count')" +title: "No response count" +description: "Count of API requests with no response received during timeout 10s" +type: "timeseries" diff --git a/dashboards/grafana/apimon_tests/results.yaml b/dashboards/grafana/apimon_tests/results.yaml new file mode 100644 index 0000000..7f6ce6e --- /dev/null +++ b/dashboards/grafana/apimon_tests/results.yaml @@ -0,0 +1,52 @@ +--- +order: 3 +datasource: apimon_db +gridPos: + w: 24 + h: 8 + y: 3 +options: + legend: {displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} + alertThreshold: true +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 4 +targets: + - format: time_series + metricColumn: name + queryType: randomWalk + rawQuery: false + rawSql: "SELECT\n \"timestamp\" AS \"time\",\n name AS metric,\n result\n\ + FROM result_summary\nWHERE\n $__timeFilter(\"timestamp\") AND\n environment\ + \ = '$environment' AND\n zone = '$zone'\nORDER BY 1,2" + refId: A + select: + - - params: + - result + type: column + table: result_summary + timeColumn: '"timestamp"' + timeColumnType: timestamp + where: + - name: $__timeFilter + params: [] + type: macro + - datatype: varchar + name: '' + params: + - environment + - '=' + - '''$environment''' + type: expression + - datatype: varchar + name: '' + params: + - zone + - '=' + - '$zone' + type: expression +title: "Test results" +type: "timeseries" diff --git a/dashboards/grafana/apimon_tests/test_duration.yaml b/dashboards/grafana/apimon_tests/test_duration.yaml new file mode 100644 index 0000000..6a6d23a --- /dev/null +++ b/dashboards/grafana/apimon_tests/test_duration.yaml @@ -0,0 +1,55 @@ +--- +order: 4 +datasource: apimon_db +gridPos: + w: 24 + h: 8 + y: 4 +options: + legend: {calcs: ["min", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} + alertThreshold: true +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 4 + unit: "ms" + decimals: 2 +targets: + - format: time_series + group: [] + metricColumn: name + queryType: randomWalk + rawQuery: false + rawSql: "SELECT\n \"timestamp\" AS \"time\",\n name AS metric,\n duration\n\ + FROM result_summary\nWHERE\n $__timeFilter(\"timestamp\") AND\n environment\ + \ = '$environment' AND\n zone = '$zone'\nORDER BY 1,2" + refId: A + select: + - - params: + - duration + type: column + table: result_summary + timeColumn: '"timestamp"' + timeColumnType: timestamp + where: + - name: $__timeFilter + params: [] + type: macro + - datatype: varchar + name: '' + params: + - environment + - '=' + - '''$environment''' + type: expression + - datatype: varchar + name: '' + params: + - zone + - '=' + - '$zone' + type: expression +title: Test duration +type: "timeseries" diff --git a/dashboards/grafana/block_storage/apis.yaml b/dashboards/grafana/block_storage/apis.yaml new file mode 100644 index 0000000..18268df --- /dev/null +++ b/dashboards/grafana/block_storage/apis.yaml @@ -0,0 +1,22 @@ +--- +order: 6 +gridPos: + w: 24 + h: 8 + y: 6 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "sortByName(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.block-storage.*.*.*.mean, 0.01), 'sum', 8, 7, 9), true)" +title: "API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/bad_calls.yaml b/dashboards/grafana/block_storage/bad_calls.yaml new file mode 100644 index 0000000..b8e02bb --- /dev/null +++ b/dashboards/grafana/block_storage/bad_calls.yaml @@ -0,0 +1,23 @@ +--- +order: 8 +description: "API calls received error HTTP response code." +gridPos: + w: 24 + h: 8 + y: 8 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {calcs: ["count"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "removeEmptySeries(aliasByMetric(consolidateBy(groupByNode(exclude(stats.timers.openstack.api.$environment.$zone.block-storage.*.*.{4*,5*}.count, '404'), 9, 'sum'), 'sum')), 0.001)" +title: "BAD calls" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/dashboard.yaml b/dashboards/grafana/block_storage/dashboard.yaml new file mode 100644 index 0000000..a896653 --- /dev/null +++ b/dashboards/grafana/block_storage/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "Block Storage Service Statistics" +uid: "CloudMonBlockStorage" +description: "Data for the Block Storage service" diff --git a/dashboards/grafana/block_storage/longest_api.yaml b/dashboards/grafana/block_storage/longest_api.yaml new file mode 100644 index 0000000..f1851dc --- /dev/null +++ b/dashboards/grafana/block_storage/longest_api.yaml @@ -0,0 +1,23 @@ +--- +order: 7 +description: "API calls which has occurences longer then 10s." +gridPos: + h: 8 + w: 24 + y: 7 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} + tooltip: {"mode": "single", "sort": "none"} +targets: + - refId: "A" + target: "maximumAbove(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.block-storage.*.*.*.upper), 'sum', 8, 7, 9), 10000)" +title: "Highest API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/volume_backup_creation.yaml b/dashboards/grafana/block_storage/volume_backup_creation.yaml new file mode 100644 index 0000000..2622349 --- /dev/null +++ b/dashboards/grafana/block_storage/volume_backup_creation.yaml @@ -0,0 +1,24 @@ +--- +order: 4 +gridPos: + w: 24 + h: 8 + y: 4 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(consolidateBy(stats.timers.apimon.metric.$environment.$zone.create_volume_backup.{default}.passed.upper_90, 'sum'), 7, 'avg')" +title: "Volume Backup creation duration" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/volume_backup_restore.yaml b/dashboards/grafana/block_storage/volume_backup_restore.yaml new file mode 100644 index 0000000..ad8c3b5 --- /dev/null +++ b/dashboards/grafana/block_storage/volume_backup_restore.yaml @@ -0,0 +1,24 @@ +--- +order: 5 +gridPos: + w: 24 + h: 8 + y: 5 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(consolidateBy(stats.timers.apimon.metric.$environment.$zone.restore_volume_backup.passed.upper_90, 'sum'), 7, 'avg')" +title: "Volume Backup Restore duration" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/volume_creation.yaml b/dashboards/grafana/block_storage/volume_creation.yaml new file mode 100644 index 0000000..26d816b --- /dev/null +++ b/dashboards/grafana/block_storage/volume_creation.yaml @@ -0,0 +1,24 @@ +--- +order: 2 +gridPos: + w: 24 + h: 8 + y: 2 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(consolidateBy(stats.timers.apimon.metric.$environment.$zone.create_volume.{_availability*,default,eu*}.passed.upper_90, 'sum'), 7, 'avg')" +title: "Volume creation duration" +type: "timeseries" diff --git a/dashboards/grafana/block_storage/volume_snapshot_creation.yaml b/dashboards/grafana/block_storage/volume_snapshot_creation.yaml new file mode 100644 index 0000000..720b572 --- /dev/null +++ b/dashboards/grafana/block_storage/volume_snapshot_creation.yaml @@ -0,0 +1,24 @@ +--- +order: 3 +gridPos: + w: 24 + h: 8 + y: 3 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(consolidateBy(stats.timers.apimon.metric.$environment.$zone.create_volume_snapshot.passed.upper_90, 'sum'), 7, 'avg')" +title: "Volume Snapshot creation duration" +type: "timeseries" diff --git a/dashboards/grafana/compute/apis.yaml b/dashboards/grafana/compute/apis.yaml new file mode 100644 index 0000000..7fa0a1d --- /dev/null +++ b/dashboards/grafana/compute/apis.yaml @@ -0,0 +1,22 @@ +--- +order: 5 +gridPos: + w: 24 + h: 8 + y: 5 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "sortByName(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.compute.*.*.*.mean, 0.01), 'sum', 8, 7, 9), true)" +title: "API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/compute/bad_calls.yaml b/dashboards/grafana/compute/bad_calls.yaml new file mode 100644 index 0000000..489574c --- /dev/null +++ b/dashboards/grafana/compute/bad_calls.yaml @@ -0,0 +1,23 @@ +--- +order: 7 +description: "API calls received error HTTP response code." +gridPos: + w: 24 + h: 8 + y: 7 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {calcs: ["count"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "removeEmptySeries(aliasByMetric(consolidateBy(groupByNode(exclude(stats.timers.openstack.api.$environment.$zone.compute.*.*.{4*,5*}.count, '404'), 9, 'sum'), 'sum')), 0.001)" +title: "BAD calls" +type: "timeseries" diff --git a/dashboards/grafana/compute/dashboard.yaml b/dashboards/grafana/compute/dashboard.yaml new file mode 100644 index 0000000..7425e44 --- /dev/null +++ b/dashboards/grafana/compute/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "Compute Service Statistics" +uid: "CloudMonCompute" +description: "Data for the Compute service" diff --git a/dashboards/grafana/compute/instance_boot_coreos.yaml b/dashboards/grafana/compute/instance_boot_coreos.yaml new file mode 100644 index 0000000..953c1e4 --- /dev/null +++ b/dashboards/grafana/compute/instance_boot_coreos.yaml @@ -0,0 +1,23 @@ +--- +order: 3 +gridPos: + w: 12 + h: 6 + y: 3 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + datasource: cloudmon + target: "groupByNode(stats.timers.apimon.metric.$environment.$zone.create_server_coreos.{default,eu*}.*.mean_90, 7, 'avg')" +title: "Instance Boot duration (coreos)" +type: "timeseries" diff --git a/dashboards/grafana/compute/instance_boot_fedora.yaml b/dashboards/grafana/compute/instance_boot_fedora.yaml new file mode 100644 index 0000000..083e219 --- /dev/null +++ b/dashboards/grafana/compute/instance_boot_fedora.yaml @@ -0,0 +1,22 @@ +--- +order: 2 +gridPos: + w: 12 + h: 6 + y: 2 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(stats.timers.apimon.metric.$environment.$zone.create_server.{default,eu*}.*.mean_90, 7, 'avg')" +title: "Instance Boot duration (Fedora33)" +type: "timeseries" diff --git a/dashboards/grafana/compute/longest_api.yaml b/dashboards/grafana/compute/longest_api.yaml new file mode 100644 index 0000000..e38def9 --- /dev/null +++ b/dashboards/grafana/compute/longest_api.yaml @@ -0,0 +1,23 @@ +--- +order: 6 +description: "API calls which has occurences longer then 10s." +gridPos: + h: 8 + w: 24 + y: 6 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} + tooltip: {"mode": "single", "sort": "none"} +targets: + - refId: "A" + target: "maximumAbove(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.compute.*.*.*.upper), 'sum', 8, 7, 9), 10000)" +title: "Highest API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/compute/metadata_failures.yaml b/dashboards/grafana/compute/metadata_failures.yaml new file mode 100644 index 0000000..906209a --- /dev/null +++ b/dashboards/grafana/compute/metadata_failures.yaml @@ -0,0 +1,21 @@ +--- +order: 3 +gridPos: + w: 12 + h: 6 + x: 12 + y: 3 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.counters.apimon.metric.$environment.$zone.metadata.*.failed, 'sum', 7)" +title: "Metadada Query failures" +type: "timeseries" diff --git a/dashboards/grafana/compute/metadata_latencies.yaml b/dashboards/grafana/compute/metadata_latencies.yaml new file mode 100644 index 0000000..56acf05 --- /dev/null +++ b/dashboards/grafana/compute/metadata_latencies.yaml @@ -0,0 +1,22 @@ +--- +order: 4 +gridPos: + w: 12 + h: 6 + y: 4 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByNode(removeEmptySeries(stats.timers.apimon.metric.$environment.$zone.metadata.*.*.*.mean), 9, 8)" +title: "Metadada Server latencies" +type: "timeseries" diff --git a/dashboards/grafana/compute/ssh.yaml b/dashboards/grafana/compute/ssh.yaml new file mode 100644 index 0000000..0b23de7 --- /dev/null +++ b/dashboards/grafana/compute/ssh.yaml @@ -0,0 +1,26 @@ +--- +order: 2 +gridPos: + w: 12 + h: 6 + x: 12 + y: 2 +fieldConfig: + defaults: + thresholds: {mode: "absolute", steps: [ + {color: "green", value: null}, + {color: "red", value: 100000000} + ]} + color: + mode: "thresholds" + max: 100 + min: 0 + noValue: "0" +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByMetric(summarize(groupByNode(stats.counters.apimon.metric.$environment.$zone.{create_server,create_server_coreos}.{default,eu*}.passed.count, 7, 'sum'), '1d', 'sum', false))" +title: "SSH Successful Logins" +type: "gauge" diff --git a/dashboards/grafana/endpoint/dashboard.yaml b/dashboards/grafana/endpoint/dashboard.yaml new file mode 100644 index 0000000..0dfa067 --- /dev/null +++ b/dashboards/grafana/endpoint/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "Endpoint Monitoring" +uid: "CloudMonEndpoint" +description: "Data for the Endpoint Monitoring" diff --git a/dashboards/grafana/endpoint/errors.yaml b/dashboards/grafana/endpoint/errors.yaml new file mode 100644 index 0000000..9ce2061 --- /dev/null +++ b/dashboards/grafana/endpoint/errors.yaml @@ -0,0 +1,23 @@ +--- +order: 3 +gridPos: + w: 24 + h: 9 + y: 3 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "maximumAbove(groupByNodes(exclude(stats.timers.openstack.api.$environment.$zone.*.*.*.{4*,5*}.count, '404|dns.GET.zone.400|dns.GET.zone_recordset.400'), 'sum', 6, 9), 0)" +title: "Amount of errors" +type: "timeseries" +interval: "5m" diff --git a/dashboards/grafana/endpoint/no_response.yaml b/dashboards/grafana/endpoint/no_response.yaml new file mode 100644 index 0000000..71fe105 --- /dev/null +++ b/dashboards/grafana/endpoint/no_response.yaml @@ -0,0 +1,23 @@ +--- +order: 2 +gridPos: + w: 8 + h: 9 + x: 16 + y: 2 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(removeBelowValue(stats.counters.openstack.api.$environment.$zone.*.*.*.failed.count, 0.1), 6, 'sum')" +title: "No response" +type: "timeseries" diff --git a/dashboards/grafana/endpoint/response_time.yaml b/dashboards/grafana/endpoint/response_time.yaml new file mode 100644 index 0000000..c14a0a8 --- /dev/null +++ b/dashboards/grafana/endpoint/response_time.yaml @@ -0,0 +1,22 @@ +--- +order: 2 +gridPos: + w: 16 + h: 9 + y: 2 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNode(stats.timers.openstack.api.$environment.$zone.*.*.*.*.upper, 6, 'avg')" +title: "Endpoint response times" +type: "timeseries" diff --git a/dashboards/grafana/endpoint/status.yaml b/dashboards/grafana/endpoint/status.yaml new file mode 100644 index 0000000..eb48c17 --- /dev/null +++ b/dashboards/grafana/endpoint/status.yaml @@ -0,0 +1,26 @@ +--- +order: 1 +gridPos: + w: 24 + h: 16 + y: 1 +fieldConfig: + defaults: + thresholds: + mode: "percentage" + steps: [ + {color: "red", value: null}, + {color: "yellow", value: 80}, + {color: "green", value: 99} + ] + color: + mode: "thresholds" + noValue: "0" + unit: "percent" + min: 0 + max: 100 +targets: + - refId: "A" + target: "aliasByMetric(summarize(groupByNode(applyByNode(stats.timers.openstack.api.$environment.$zone.*.*.*.*.count, 6, \"asPercent(sumSeries(%.*.*.{2*,3*,404}.count), sumSeries(%.*.*.*.count))\", \"%.pct\"), 6, 'avg'), '1hour', 'avg'))" +title: "Endpoint status" +type: "gauge" diff --git a/dashboards/grafana/identity/apis.yaml b/dashboards/grafana/identity/apis.yaml index 2972389..1bdafba 100644 --- a/dashboards/grafana/identity/apis.yaml +++ b/dashboards/grafana/identity/apis.yaml @@ -1,8 +1,17 @@ --- -order: 10 +order: 3 gridPos: w: 24 h: 8 + y: 3 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 options: legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} tooltip: {mode: "single", sort: "none"} diff --git a/dashboards/grafana/identity/bad_calls.yaml b/dashboards/grafana/identity/bad_calls.yaml new file mode 100644 index 0000000..5912d0c --- /dev/null +++ b/dashboards/grafana/identity/bad_calls.yaml @@ -0,0 +1,23 @@ +--- +order: 5 +description: "API calls received error HTTP response code." +gridPos: + w: 24 + h: 8 + y: 5 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {calcs: ["count"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "removeEmptySeries(aliasByMetric(consolidateBy(groupByNode(exclude(stats.timers.openstack.api.$environment.$zone.identity.*.*.{4*,5*}.count, '404'), 9, 'sum'), 'sum')), 0.001)" +title: "BAD calls" +type: "timeseries" diff --git a/dashboards/grafana/identity/longest_api.yaml b/dashboards/grafana/identity/longest_api.yaml index e2b6e7c..9dc9cea 100644 --- a/dashboards/grafana/identity/longest_api.yaml +++ b/dashboards/grafana/identity/longest_api.yaml @@ -1,9 +1,18 @@ --- -order: 11 +order: 4 description: "API calls which has occurences longer then 10s." gridPos: h: 8 w: 24 + y: 4 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 options: legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} tooltip: {"mode": "single", "sort": "none"} diff --git a/dashboards/grafana/identity/results.yaml b/dashboards/grafana/identity/results.yaml deleted file mode 100644 index c0b7ecc..0000000 --- a/dashboards/grafana/identity/results.yaml +++ /dev/null @@ -1,39 +0,0 @@ ---- -order: 100 -datasource: apimon_db -gridPos: - w: 24 - h: 10 -options: - showHeader: true -fieldConfig: - overrides: - - matcher: - id: "byName" - options: "log_url" - properties: - - id: "links" - value: - - targetBlank: true - url: "${__value.raw}" -targets: - - refId: "A" - datasource: apimon_db - format: "table" - metricColumn: "long_name" - rawSql: | - SELECT - result_summary."timestamp" AS "time", - result_summary.name AS "Name", - result_summary.job_id AS "job_id", - result_summary.result, - jobs.log_url - FROM result_summary - JOIN jobs on result_summary.job_id = jobs.job_id - WHERE - $__timeFilter(result_summary."timestamp") AND - result_summary.environment = '$environment' AND - result_summary.zone in ($zone) - ORDER BY 1 DESC -title: "Scenario results" -type: "table" diff --git a/dashboards/grafana/identity/tokens.yaml b/dashboards/grafana/identity/tokens.yaml new file mode 100644 index 0000000..94356df --- /dev/null +++ b/dashboards/grafana/identity/tokens.yaml @@ -0,0 +1,22 @@ +--- +order: 2 +gridPos: + w: 24 + h: 8 + y: 2 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + pointSize: 2 + unit: "ms" + decimals: 2 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByNode(consolidateBy(stats.timers.openstack.api.$environment.$zone.identity.POST.tokens.201.upper_90, 'max'), 5)" +title: "Token creation duration" +type: "timeseries" diff --git a/dashboards/grafana/image/apis.yaml b/dashboards/grafana/image/apis.yaml index a6be05c..c9f2a4b 100644 --- a/dashboards/grafana/image/apis.yaml +++ b/dashboards/grafana/image/apis.yaml @@ -10,6 +10,8 @@ fieldConfig: fillOpacity: 10 spanNulls: 3600000 showPoints: "never" + unit: "ms" + decimals: 2 options: legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} tooltip: {mode: "single", sort: "none"} diff --git a/dashboards/grafana/image/longest_api.yaml b/dashboards/grafana/image/longest_api.yaml index 3b9c163..c98706d 100644 --- a/dashboards/grafana/image/longest_api.yaml +++ b/dashboards/grafana/image/longest_api.yaml @@ -11,6 +11,8 @@ fieldConfig: fillOpacity: 10 spanNulls: 3600000 showPoints: "never" + unit: "ms" + decimals: 2 options: legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} tooltip: {"mode": "single", "sort": "none"} diff --git a/dashboards/grafana/network/apis.yaml b/dashboards/grafana/network/apis.yaml new file mode 100644 index 0000000..c58ed9f --- /dev/null +++ b/dashboards/grafana/network/apis.yaml @@ -0,0 +1,22 @@ +--- +order: 7 +gridPos: + w: 24 + h: 8 + y: 7 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "sortByName(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.network.*.*.*.mean, 0.01), 'sum', 8, 7, 9), true)" +title: "API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/network/bad_calls.yaml b/dashboards/grafana/network/bad_calls.yaml new file mode 100644 index 0000000..2d778f8 --- /dev/null +++ b/dashboards/grafana/network/bad_calls.yaml @@ -0,0 +1,23 @@ +--- +order: 9 +description: "API calls received error HTTP response code." +gridPos: + w: 24 + h: 8 + y: 9 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {calcs: ["count"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "removeEmptySeries(aliasByMetric(consolidateBy(groupByNode(exclude(stats.timers.openstack.api.$environment.$zone.network.*.*.{4*,5*}.count, '404'), 9, 'sum'), 'sum')), 0.001)" +title: "BAD calls" +type: "timeseries" diff --git a/dashboards/grafana/network/dashboard.yaml b/dashboards/grafana/network/dashboard.yaml new file mode 100644 index 0000000..a42fe39 --- /dev/null +++ b/dashboards/grafana/network/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "Network Service Statistics" +uid: "CloudMonNetwork" +description: "Data for the Network service" diff --git a/dashboards/grafana/network/domains_errors.yaml b/dashboards/grafana/network/domains_errors.yaml new file mode 100644 index 0000000..2d40984 --- /dev/null +++ b/dashboards/grafana/network/domains_errors.yaml @@ -0,0 +1,23 @@ +--- +order: 3 +description: "Curl like request errors (4*, 5*, failed) for the hosts in scenario30_domains.yaml\n" +gridPos: + w: 6 + h: 6 + x: 18 + y: 3 +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 6 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByNode(stats.counters.apimon.metric.$environment.$zone.curl.*.{4*,5*}.attempted.count, 7, 8)" + - refId: "B" + target: "aliasByNode(stats.counters.apimon.metric.$environment.$zone.curl.*.failed.count, 7, 8)" +title: "Domains curl Errors" +type: "timeseries" diff --git a/dashboards/grafana/network/domains_latency.yaml b/dashboards/grafana/network/domains_latency.yaml new file mode 100644 index 0000000..015b85f --- /dev/null +++ b/dashboards/grafana/network/domains_latency.yaml @@ -0,0 +1,23 @@ +--- +order: 3 +description: "Curl like request latencies to the hosts in scenario30_domains.yaml\n" +gridPos: + w: 18 + h: 6 + y: 3 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["lastNotNull"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByNode(stats.timers.apimon.metric.$environment.$zone.curl.*.{2*,3*}.mean, 5, 7)" +title: "Domains curl Latency" +type: "timeseries" diff --git a/dashboards/grafana/network/internal_errors.yaml b/dashboards/grafana/network/internal_errors.yaml new file mode 100644 index 0000000..0b208dc --- /dev/null +++ b/dashboards/grafana/network/internal_errors.yaml @@ -0,0 +1,21 @@ +--- +order: 5 +description: "Ping errors for hosts inside VPC/VPC peering using internal IP addresses\n" +gridPos: + w: 6 + h: 8 + x: 18 + y: 5 +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 6 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.counters.apimon.metric.$environment.$zone.ping.{peering,vpc}.*.*.failed, 'sum', 7)" +title: "Internal ping errors" +type: "timeseries" diff --git a/dashboards/grafana/network/internal_latency.yaml b/dashboards/grafana/network/internal_latency.yaml new file mode 100644 index 0000000..6747e0b --- /dev/null +++ b/dashboards/grafana/network/internal_latency.yaml @@ -0,0 +1,23 @@ +--- +order: 5 +description: "Ping latency for hosts inside VPC or through VPC Peering using internal IP addresses\n" +gridPos: + w: 18 + h: 8 + y: 5 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["lastNotNull"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.timers.apimon.metric.$environment.$zone.ping.{vpc,peering}.*.*.mean, 'avg', 9)" +title: "Internal VPC/Peering Latency" +type: "timeseries" diff --git a/dashboards/grafana/network/longest_api.yaml b/dashboards/grafana/network/longest_api.yaml new file mode 100644 index 0000000..8423592 --- /dev/null +++ b/dashboards/grafana/network/longest_api.yaml @@ -0,0 +1,23 @@ +--- +order: 8 +description: "API calls which has occurences longer then 10s." +gridPos: + h: 8 + w: 24 + y: 8 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} + tooltip: {"mode": "single", "sort": "none"} +targets: + - refId: "A" + target: "maximumAbove(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.network.*.*.*.upper), 'sum', 8, 7, 9), 10000)" +title: "Highest API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/network/ns_failures.yaml b/dashboards/grafana/network/ns_failures.yaml new file mode 100644 index 0000000..688124c --- /dev/null +++ b/dashboards/grafana/network/ns_failures.yaml @@ -0,0 +1,21 @@ +--- +order: 4 +description: "NS Lookup failures from specific NS servers\n" +gridPos: + w: 6 + h: 6 + x: 18 + y: 4 +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 6 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "aliasByNode(stats.counters.apimon.metric.$environment.$zone.dns.*.*.failed.count, 7, 8)" +title: "NS Lookup Failures" +type: "timeseries" diff --git a/dashboards/grafana/network/ns_lookup.yaml b/dashboards/grafana/network/ns_lookup.yaml new file mode 100644 index 0000000..466e039 --- /dev/null +++ b/dashboards/grafana/network/ns_lookup.yaml @@ -0,0 +1,23 @@ +--- +order: 4 +description: "NS Lookup from specific NS servers\n" +gridPos: + w: 18 + h: 6 + y: 4 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["mean", "last", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.timers.apimon.metric.$environment.$zone.dns.*.*.mean, 'avg', 7, 8)" +title: "NS Lookup" +type: "timeseries" diff --git a/dashboards/grafana/network/outside_errors.yaml b/dashboards/grafana/network/outside_errors.yaml new file mode 100644 index 0000000..1632b5d --- /dev/null +++ b/dashboards/grafana/network/outside_errors.yaml @@ -0,0 +1,21 @@ +--- +order: 6 +description: "Outside ping errors. Single errors are not an problem indicator (pinging hosts in different countries).\n" +gridPos: + w: 6 + h: 8 + x: 18 + y: 6 +fieldConfig: + defaults: + custom: + drawStyle: "points" + pointSize: 6 +options: + legend: {displayMode: "list", placement: "bottom"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.counters.apimon.metric.$environment.$zone.ping.{natgw,snat}.*.failed, 'sum', 7)" +title: "Outside ping errors" +type: "timeseries" diff --git a/dashboards/grafana/network/outside_latency.yaml b/dashboards/grafana/network/outside_latency.yaml new file mode 100644 index 0000000..30b8473 --- /dev/null +++ b/dashboards/grafana/network/outside_latency.yaml @@ -0,0 +1,23 @@ +--- +order: 6 +description: "Ping latency for outside world through NATGW and Shared SNAT\n" +gridPos: + w: 18 + h: 8 + y: 6 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["lastNotNull"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "groupByNodes(stats.timers.apimon.metric.$environment.$zone.ping.{natgw,snat}.*.mean, 'avg', 7)" +title: "Outside NATGW/SNAT ping Latency" +type: "timeseries" diff --git a/dashboards/grafana/storage/apis.yaml b/dashboards/grafana/storage/apis.yaml new file mode 100644 index 0000000..9dc5390 --- /dev/null +++ b/dashboards/grafana/storage/apis.yaml @@ -0,0 +1,22 @@ +--- +order: 2 +gridPos: + w: 24 + h: 8 + y: 2 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {calcs: ["min", "mean", "max"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "sortByName(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.{object,object-store}.*.*.*.mean, 0.01), 'sum', 8, 7, 9), true)" +title: "API calls duration" +type: "timeseries" diff --git a/dashboards/grafana/storage/bad_calls.yaml b/dashboards/grafana/storage/bad_calls.yaml new file mode 100644 index 0000000..8356519 --- /dev/null +++ b/dashboards/grafana/storage/bad_calls.yaml @@ -0,0 +1,23 @@ +--- +order: 4 +description: "API calls received error HTTP response code." +gridPos: + w: 24 + h: 8 + y: 4 +bars: true +fieldConfig: + defaults: + custom: + drawStyle: "bars" + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" +options: + legend: {calcs: ["count"], displayMode: "table", placement: "right"} + tooltip: {mode: "single", sort: "none"} +targets: + - refId: "A" + target: "removeEmptySeries(aliasByMetric(consolidateBy(groupByNode(exclude(stats.timers.openstack.api.$environment.$zone.{object,object-store}.*.*.{4*,5*}.count, '404'), 9, 'sum'), 'sum')), 0.001)" +title: "BAD calls" +type: "timeseries" diff --git a/dashboards/grafana/storage/dashboard.yaml b/dashboards/grafana/storage/dashboard.yaml new file mode 100644 index 0000000..b303781 --- /dev/null +++ b/dashboards/grafana/storage/dashboard.yaml @@ -0,0 +1,4 @@ +--- +title: "Storage Service Statistics" +uid: "CloudMonStorage" +description: "Data for the Storage service" diff --git a/dashboards/grafana/storage/longest_api.yaml b/dashboards/grafana/storage/longest_api.yaml new file mode 100644 index 0000000..460bfea --- /dev/null +++ b/dashboards/grafana/storage/longest_api.yaml @@ -0,0 +1,23 @@ +--- +order: 3 +description: "API calls which has occurences longer then 10s." +gridPos: + h: 8 + w: 24 + y: 3 +fieldConfig: + defaults: + custom: + fillOpacity: 10 + spanNulls: 3600000 + showPoints: "never" + unit: "ms" + decimals: 2 +options: + legend: {"calcs": ["min", "mean", "max"], "displayMode": "table", "placement": "right"} + tooltip: {"mode": "single", "sort": "none"} +targets: + - refId: "A" + target: "maximumAbove(groupByNodes(removeEmptySeries(stats.timers.openstack.api.$environment.$zone.{object,object-store}.*.*.*.upper), 'sum', 8, 7, 9), 10000)" +title: "Highest API calls duration" +type: "timeseries"