Skip to content

Commit

Permalink
Fix issue where auth token didn't rotate as expected (#5)
Browse files Browse the repository at this point in the history
  • Loading branch information
DrEsteban authored Dec 18, 2024
1 parent f142d9d commit e110c68
Show file tree
Hide file tree
Showing 5 changed files with 55 additions and 27 deletions.
14 changes: 11 additions & 3 deletions src/Controllers/MetricsController.cs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
using Prometheus;
using SolarGateway_PrometheusProxy.Filters;
using SolarGateway_PrometheusProxy.Models;
using SolarGateway_PrometheusProxy.Services;

namespace SolarGateway_PrometheusProxy.Controllers;

Expand All @@ -14,24 +15,31 @@ public class MetricsController(
IEnumerable<IMetricsService> metricsService,
CollectorRegistry collectorRegistry,
IOptions<ResponseCacheConfiguration> responseCacheConfiguration,
IMemoryCache cache) : ControllerBase
IMemoryCache cache,
ILogger<MetricsController> logger) : ControllerBase
{
private readonly IEnumerable<IMetricsService> _metricsServices = metricsService;
private readonly CollectorRegistry _collectorRegistry = collectorRegistry;
private readonly ResponseCacheConfiguration _responseCacheConfiguration = responseCacheConfiguration.Value;
private readonly IMemoryCache _cache = cache;
private readonly ILogger<MetricsController> _logger = logger;

[HttpHead("/health")] // health check
[HttpHead] // health check
[HttpGet]
[ResponseCache(CacheProfileName = "default")]
public async Task GetMetrics()
{
// Set max timeout for metrics request
using var cts = CancellationTokenSource.CreateLinkedTokenSource(HttpContext.RequestAborted);
cts.CancelAfter(TimeSpan.FromSeconds(5));
using var _ = cts.Token.Register(() => _logger.LogWarning("Canceling metrics request due to 5 sec timeout"));

// Ensure metrics are only collected once per cache duration
await this._cache.GetOrCreateAsync("LastMetricsRequest",
async e =>
{
await Task.WhenAll(_metricsServices.Select(m => m.CollectMetricsAsync(_collectorRegistry, HttpContext.RequestAborted)));
await Task.WhenAll(_metricsServices.Select(m => m.CollectMetricsAsync(_collectorRegistry, cts.Token)));
e.AbsoluteExpirationRelativeToNow = TimeSpan.FromSeconds(_responseCacheConfiguration.ResponseCacheDurationSeconds);
return DateTimeOffset.UtcNow;
});
Expand All @@ -42,7 +50,7 @@ await this._cache.GetOrCreateAsync("LastMetricsRequest",

// Serialize metrics
Response.ContentType = PrometheusConstants.ExporterOpenMetricsContentTypeValue.ToString();
await _collectorRegistry.CollectAndExportAsTextAsync(Response.Body, ExpositionFormat.OpenMetricsText, HttpContext.RequestAborted);
await _collectorRegistry.CollectAndExportAsTextAsync(Response.Body, ExpositionFormat.OpenMetricsText, cts.Token);
}
}
}
1 change: 0 additions & 1 deletion src/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,6 @@
using OpenTelemetry.Resources;
using OpenTelemetry.Trace;
using Prometheus;
using SolarGateway_PrometheusProxy;
using SolarGateway_PrometheusProxy.Models;
using SolarGateway_PrometheusProxy.Services;
using SolarGateway_PrometheusProxy.Support;
Expand Down
2 changes: 1 addition & 1 deletion src/IMetricsService.cs → src/Services/IMetricsService.cs
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
using Prometheus;

namespace SolarGateway_PrometheusProxy;
namespace SolarGateway_PrometheusProxy.Services;

/// <summary>
/// Implemented by services that collect metrics from a solar brand's device(s) and save them to a Prometheus <see cref="CollectorRegistry"/>.
Expand Down
53 changes: 31 additions & 22 deletions src/Services/TeslaGatewayMetricsService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
using Prometheus;
using SolarGateway_PrometheusProxy.Exceptions;
using SolarGateway_PrometheusProxy.Models;
using SolarGateway_PrometheusProxy.Support;

namespace SolarGateway_PrometheusProxy.Services;

Expand All @@ -32,30 +33,37 @@ public override async Task CollectMetricsAsync(CollectorRegistry collectorRegist
var sw = Stopwatch.StartNew();
bool loginCached = true;

// Check auth token cache
if (string.IsNullOrWhiteSpace(this._cachedLoginResponse?.Token) ||
await this.PingTestAsync(cancellationToken) is HttpStatusCode.Unauthorized or HttpStatusCode.Forbidden)
(await this.PingTestAsync(cancellationToken)).IsAuthenticationFailure())
{
// Cache an auth token
loginCached = false;
this._cachedLoginResponse = await this.LoginAsync(cancellationToken);
}

// Confirm auth token
var pingStatusCode = await this.PingTestAsync(cancellationToken);
if ((int)pingStatusCode is not >= 200 and < 300)
if (!pingStatusCode.IsSuccessStatusCode())
{
throw new MetricRequestFailedException($"Failed to authenticate and ping the Tesla Gateway: {(int)pingStatusCode} ({pingStatusCode})");
}

// Get rest of metrics in parallel
// Get metrics in parallel
var results = await Task.WhenAll(
this.PullMeterAggregatesAsync(collectorRegistry, cancellationToken),
this.PullPowerwallPercentageAsync(collectorRegistry, cancellationToken),
this.PullSiteInfoAsync(collectorRegistry, cancellationToken),
this.PullStatusAsync(collectorRegistry, cancellationToken),
this.PullOperationAsync(collectorRegistry, cancellationToken));
if (!results.All(r => r))
if (!results.All(r => r.IsSuccessStatusCode()))
{
throw new MetricRequestFailedException($"Failed to pull {results.Count(r => !r)}/{results.Length} endpoints on Tesla gateway");
if (results.Any(r => r.IsAuthenticationFailure()))
{
this._cachedLoginResponse = null;
}
int numSuccessful = results.Count(r => r.IsSuccessStatusCode());
throw new MetricRequestFailedException($"Failed to pull {numSuccessful}/{results.Length} endpoints on Tesla gateway");
}

base.SetRequestDurationMetric(collectorRegistry, loginCached, sw.Elapsed);
Expand All @@ -82,19 +90,20 @@ private async Task<TeslaLoginResponse> LoginAsync(CancellationToken cancellation

private async Task<HttpStatusCode> PingTestAsync(CancellationToken cancellationToken)
{
using var request = new HttpRequestMessage(HttpMethod.Get, "/customer");
// Arbitrarily picking /api/operation as a test endpoint
using var request = new HttpRequestMessage(HttpMethod.Get, "/api/operation");
request.Headers.Authorization = this._cachedLoginResponse?.AuthenticationHeader;
using var response = await this._client.SendAsync(request, HttpCompletionOption.ResponseHeadersRead, cancellationToken);
return response.StatusCode;
}

private async Task<bool> PullMeterAggregatesAsync(CollectorRegistry registry, CancellationToken cancellationToken)
private async Task<HttpStatusCode> PullMeterAggregatesAsync(CollectorRegistry registry, CancellationToken cancellationToken)
{
(var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/meters/aggregates", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken);
if (metricsDocument is null)
{
this._logger.LogError("API Meter aggregates document is null");
return false;
return statusCode;
}
using var _ = metricsDocument;

Expand Down Expand Up @@ -122,30 +131,30 @@ private async Task<bool> PullMeterAggregatesAsync(CollectorRegistry registry, Ca
}
}

return true;
return statusCode;
}

private async Task<bool> PullPowerwallPercentageAsync(CollectorRegistry registry, CancellationToken cancellationToken)
private async Task<HttpStatusCode> PullPowerwallPercentageAsync(CollectorRegistry registry, CancellationToken cancellationToken)
{
(var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/system_status/soe", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken);
if (metricsDocument is null)
{
this._logger.LogError("API Powerwall percentage document is null");
return false;
return statusCode;
}
using var _ = metricsDocument;

base.CreateGauge(registry, "powerwall", "percentage").Set(metricsDocument.RootElement.GetProperty("percentage").GetDouble());
return true;
return statusCode;
}

private async Task<bool> PullSiteInfoAsync(CollectorRegistry registry, CancellationToken cancellationToken)
private async Task<HttpStatusCode> PullSiteInfoAsync(CollectorRegistry registry, CancellationToken cancellationToken)
{
(var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/site_info", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken);
if (metricsDocument is null)
{
this._logger.LogError("Site info document is null");
return false;
return statusCode;
}
using var _ = metricsDocument;

Expand All @@ -155,19 +164,19 @@ private async Task<bool> PullSiteInfoAsync(CollectorRegistry registry, Cancellat
base.CreateGauge(registry, "siteinfo", metric.Name).Set(metric.Value.GetDouble());
}

return true;
return statusCode;
}

[GeneratedRegex(@"^(?<hours>[0-9]*)h(?<minutes>[0-9]*)m(?<seconds>[0-9]*)(\.[0-9]*s)?$")]
private static partial Regex UpTimeRegex();

private async Task<bool> PullStatusAsync(CollectorRegistry registry, CancellationToken cancellationToken)
private async Task<HttpStatusCode> PullStatusAsync(CollectorRegistry registry, CancellationToken cancellationToken)
{
(var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/status", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken);
if (metricsDocument is null)
{
this._logger.LogError("API Status document is null");
return false;
return statusCode;
}
using var _ = metricsDocument;

Expand All @@ -186,16 +195,16 @@ private async Task<bool> PullStatusAsync(CollectorRegistry registry, Cancellatio
base.CreateGauge(registry, "status", "up_time_seconds").Set(timeSpan.TotalSeconds);
}

return true;
return statusCode;
}

private async Task<bool> PullOperationAsync(CollectorRegistry registry, CancellationToken cancellationToken)
private async Task<HttpStatusCode> PullOperationAsync(CollectorRegistry registry, CancellationToken cancellationToken)
{
(var metricsDocument, var statusCode) = await base.CallMetricEndpointAsync("/api/operation", this._cachedLoginResponse?.AuthenticationHeader, cancellationToken);
if (metricsDocument is null)
{
this._logger.LogError("API Operation document is null");
return false;
return statusCode;
}
using var _ = metricsDocument;

Expand All @@ -209,6 +218,6 @@ private async Task<bool> PullOperationAsync(CollectorRegistry registry, Cancella
GetModeGauge(autonomous).Set(realMode == autonomous ? 1 : 0);
GetModeGauge(backup).Set(realMode == backup ? 1 : 0);

return true;
return statusCode;
}
}
}
12 changes: 12 additions & 0 deletions src/Support/Extensions.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
using System.Net;

namespace SolarGateway_PrometheusProxy.Support;

internal static class Extensions
{
public static bool IsSuccessStatusCode(this HttpStatusCode statusCode)
=> (int)statusCode is >= 200 and < 300;

public static bool IsAuthenticationFailure(this HttpStatusCode statusCode)
=> statusCode is HttpStatusCode.Unauthorized or HttpStatusCode.Forbidden;
}

0 comments on commit e110c68

Please sign in to comment.