diff --git a/src/Controllers/MetricsController.cs b/src/Controllers/MetricsController.cs index 7e357f8..5f6c2ad 100644 --- a/src/Controllers/MetricsController.cs +++ b/src/Controllers/MetricsController.cs @@ -4,6 +4,7 @@ using Prometheus; using SolarGateway_PrometheusProxy.Filters; using SolarGateway_PrometheusProxy.Models; +using SolarGateway_PrometheusProxy.Services; namespace SolarGateway_PrometheusProxy.Controllers; @@ -14,12 +15,14 @@ public class MetricsController( IEnumerable metricsService, CollectorRegistry collectorRegistry, IOptions responseCacheConfiguration, - IMemoryCache cache) : ControllerBase + IMemoryCache cache, + ILogger logger) : ControllerBase { private readonly IEnumerable _metricsServices = metricsService; private readonly CollectorRegistry _collectorRegistry = collectorRegistry; private readonly ResponseCacheConfiguration _responseCacheConfiguration = responseCacheConfiguration.Value; private readonly IMemoryCache _cache = cache; + private readonly ILogger _logger = logger; [HttpHead("/health")] // health check [HttpHead] // health check @@ -27,11 +30,16 @@ public class MetricsController( [ResponseCache(CacheProfileName = "default")] public async Task GetMetrics() { + // Set max timeout for metrics request + using var cts = CancellationTokenSource.CreateLinkedTokenSource(HttpContext.RequestAborted); + cts.CancelAfter(TimeSpan.FromSeconds(5)); + using var _ = cts.Token.Register(() => _logger.LogWarning("Canceling metrics request due to 5 sec timeout")); + // Ensure metrics are only collected once per cache duration await this._cache.GetOrCreateAsync("LastMetricsRequest", async e => { - await Task.WhenAll(_metricsServices.Select(m => m.CollectMetricsAsync(_collectorRegistry, HttpContext.RequestAborted))); + await Task.WhenAll(_metricsServices.Select(m => m.CollectMetricsAsync(_collectorRegistry, cts.Token))); e.AbsoluteExpirationRelativeToNow = TimeSpan.FromSeconds(_responseCacheConfiguration.ResponseCacheDurationSeconds); return DateTimeOffset.UtcNow; }); @@ -42,7 +50,7 @@ await this._cache.GetOrCreateAsync("LastMetricsRequest", // Serialize metrics Response.ContentType = PrometheusConstants.ExporterOpenMetricsContentTypeValue.ToString(); - await _collectorRegistry.CollectAndExportAsTextAsync(Response.Body, ExpositionFormat.OpenMetricsText, HttpContext.RequestAborted); + await _collectorRegistry.CollectAndExportAsTextAsync(Response.Body, ExpositionFormat.OpenMetricsText, cts.Token); } } } \ No newline at end of file diff --git a/src/Program.cs b/src/Program.cs index 1639e9f..8562e6d 100644 --- a/src/Program.cs +++ b/src/Program.cs @@ -11,7 +11,6 @@ using OpenTelemetry.Resources; using OpenTelemetry.Trace; using Prometheus; -using SolarGateway_PrometheusProxy; using SolarGateway_PrometheusProxy.Models; using SolarGateway_PrometheusProxy.Services; using SolarGateway_PrometheusProxy.Support; diff --git a/src/IMetricsService.cs b/src/Services/IMetricsService.cs similarity index 87% rename from src/IMetricsService.cs rename to src/Services/IMetricsService.cs index f011ef3..99741af 100644 --- a/src/IMetricsService.cs +++ b/src/Services/IMetricsService.cs @@ -1,6 +1,6 @@ using Prometheus; -namespace SolarGateway_PrometheusProxy; +namespace SolarGateway_PrometheusProxy.Services; /// /// Implemented by services that collect metrics from a solar brand's device(s) and save them to a Prometheus . diff --git a/src/Services/TeslaGatewayMetricsService.cs b/src/Services/TeslaGatewayMetricsService.cs index 3fa95d4..7cd8c0c 100644 --- a/src/Services/TeslaGatewayMetricsService.cs +++ b/src/Services/TeslaGatewayMetricsService.cs @@ -6,6 +6,7 @@ using Prometheus; using SolarGateway_PrometheusProxy.Exceptions; using SolarGateway_PrometheusProxy.Models; +using SolarGateway_PrometheusProxy.Support; namespace SolarGateway_PrometheusProxy.Services; @@ -32,30 +33,37 @@ public override async Task CollectMetricsAsync(CollectorRegistry collectorRegist var sw = Stopwatch.StartNew(); bool loginCached = true; + // Check auth token cache if (string.IsNullOrWhiteSpace(this._cachedLoginResponse?.Token) || - await this.PingTestAsync(cancellationToken) is HttpStatusCode.Unauthorized or HttpStatusCode.Forbidden) + (await this.PingTestAsync(cancellationToken)).IsAuthenticationFailure()) { // Cache an auth token loginCached = false; this._cachedLoginResponse = await this.LoginAsync(cancellationToken); } + // Confirm auth token var pingStatusCode = await this.PingTestAsync(cancellationToken); - if ((int)pingStatusCode is not >= 200 and < 300) + if (!pingStatusCode.IsSuccessStatusCode()) { throw new MetricRequestFailedException($"Failed to authenticate and ping the Tesla Gateway: {(int)pingStatusCode} ({pingStatusCode})"); } - // Get rest of metrics in parallel + // Get metrics in parallel var results = await Task.WhenAll( this.PullMeterAggregatesAsync(collectorRegistry, cancellationToken), this.PullPowerwallPercentageAsync(collectorRegistry, cancellationToken), this.PullSiteInfoAsync(collectorRegistry, cancellationToken), this.PullStatusAsync(collectorRegistry, cancellationToken), this.PullOperationAsync(collectorRegistry, cancellationToken)); - if (!results.All(r => r)) + if (!results.All(r => r.IsSuccessStatusCode())) { - throw new MetricRequestFailedException($"Failed to pull {results.Count(r => !r)}/{results.Length} endpoints on Tesla gateway"); + if (results.Any(r => r.IsAuthenticationFailure())) + { + this._cachedLoginResponse = null; + } + int numSuccessful = results.Count(r => r.IsSuccessStatusCode()); + throw new MetricRequestFailedException($"Failed to pull {numSuccessful}/{results.Length} endpoints on Tesla gateway"); } base.SetRequestDurationMetric(collectorRegistry, loginCached, sw.Elapsed); @@ -82,19 +90,20 @@ private async Task LoginAsync(CancellationToken cancellation private async Task PingTestAsync(CancellationToken cancellationToken) { - using var request = new HttpRequestMessage(HttpMethod.Get, "/customer"); + // Arbitrarily picking /api/operation as a test endpoint + using var request = new HttpRequestMessage(HttpMethod.Get, "/api/operation"); request.Headers.Authorization = this._cachedLoginResponse?.AuthenticationHeader; using var response = await this._client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken); return response.StatusCode; } - private async Task PullMeterAggregatesAsync(CollectorRegistry registry, CancellationToken cancellationToken) + private async Task PullMeterAggregatesAsync(CollectorRegistry registry, CancellationToken cancellationToken) { (var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/meters/aggregates", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken); if (metricsDocument is null) { this._logger.LogError("API Meter aggregates document is null"); - return false; + return statusCode; } using var _ = metricsDocument; @@ -122,30 +131,30 @@ private async Task PullMeterAggregatesAsync(CollectorRegistry registry, Ca } } - return true; + return statusCode; } - private async Task PullPowerwallPercentageAsync(CollectorRegistry registry, CancellationToken cancellationToken) + private async Task PullPowerwallPercentageAsync(CollectorRegistry registry, CancellationToken cancellationToken) { (var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/system_status/soe", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken); if (metricsDocument is null) { this._logger.LogError("API Powerwall percentage document is null"); - return false; + return statusCode; } using var _ = metricsDocument; base.CreateGauge(registry, "powerwall", "percentage").Set(metricsDocument.RootElement.GetProperty("percentage").GetDouble()); - return true; + return statusCode; } - private async Task PullSiteInfoAsync(CollectorRegistry registry, CancellationToken cancellationToken) + private async Task PullSiteInfoAsync(CollectorRegistry registry, CancellationToken cancellationToken) { (var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/site_info", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken); if (metricsDocument is null) { this._logger.LogError("Site info document is null"); - return false; + return statusCode; } using var _ = metricsDocument; @@ -155,19 +164,19 @@ private async Task PullSiteInfoAsync(CollectorRegistry registry, Cancellat base.CreateGauge(registry, "siteinfo", metric.Name).Set(metric.Value.GetDouble()); } - return true; + return statusCode; } [GeneratedRegex(@"^(?[0-9]*)h(?[0-9]*)m(?[0-9]*)(\.[0-9]*s)?$")] private static partial Regex UpTimeRegex(); - private async Task PullStatusAsync(CollectorRegistry registry, CancellationToken cancellationToken) + private async Task PullStatusAsync(CollectorRegistry registry, CancellationToken cancellationToken) { (var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/status", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken); if (metricsDocument is null) { this._logger.LogError("API Status document is null"); - return false; + return statusCode; } using var _ = metricsDocument; @@ -186,16 +195,16 @@ private async Task PullStatusAsync(CollectorRegistry registry, Cancellatio base.CreateGauge(registry, "status", "up_time_seconds").Set(timeSpan.TotalSeconds); } - return true; + return statusCode; } - private async Task PullOperationAsync(CollectorRegistry registry, CancellationToken cancellationToken) + private async Task PullOperationAsync(CollectorRegistry registry, CancellationToken cancellationToken) { (var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/operation", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken); if (metricsDocument is null) { this._logger.LogError("API Operation document is null"); - return false; + return statusCode; } using var _ = metricsDocument; @@ -209,6 +218,6 @@ private async Task PullOperationAsync(CollectorRegistry registry, Cancella GetModeGauge(autonomous).Set(realMode == autonomous ? 1 : 0); GetModeGauge(backup).Set(realMode == backup ? 1 : 0); - return true; + return statusCode; } -} \ No newline at end of file +} diff --git a/src/Support/Extensions.cs b/src/Support/Extensions.cs new file mode 100644 index 0000000..eec34bc --- /dev/null +++ b/src/Support/Extensions.cs @@ -0,0 +1,12 @@ +using System.Net; + +namespace SolarGateway_PrometheusProxy.Support; + +internal static class Extensions +{ + public static bool IsSuccessStatusCode(this HttpStatusCode statusCode) + => (int)statusCode is >= 200 and < 300; + + public static bool IsAuthenticationFailure(this HttpStatusCode statusCode) + => statusCode is HttpStatusCode.Unauthorized or HttpStatusCode.Forbidden; +}