Skip to content

Commit

Permalink
escalate plugin: handle origin not reachable
Browse files Browse the repository at this point in the history
Update the escalate plugin to handle dispatching to the failover server
if the original server is down.

This also adds a new autest for the escalate plugin.

Fixes: apache#11836
  • Loading branch information
bneradt committed Nov 6, 2024
1 parent 191ba94 commit 5072906
Show file tree
Hide file tree
Showing 4 changed files with 627 additions and 13 deletions.
116 changes: 103 additions & 13 deletions plugins/escalate/escalate.cc
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ const char PLUGIN_NAME[] = "escalate";
static DbgCtl dbg_ctl{PLUGIN_NAME};

static int EscalateResponse(TSCont, TSEvent, void *);
static int EscalateResponseServerError(TSCont, TSEvent, void *);

//////////////////////////////////////////////////////////////////////////////////////////
// Hold information about the escalation / retry states for a remap rule.
Expand All @@ -56,13 +57,19 @@ struct EscalationState {

EscalationState()
{
cont = TSContCreate(EscalateResponse, nullptr);
cont = TSContCreate(EscalateResponse, nullptr);
cont_server_error = TSContCreate(EscalateResponseServerError, nullptr);

TSContDataSet(cont, this);
}

~EscalationState() { TSContDestroy(cont); }
~EscalationState()
{
TSContDestroy(cont);
TSContDestroy(cont_server_error);
}
TSCont cont;
TSCont cont_server_error;
StatusMapType status_map;
bool use_pristine = false;
};
Expand Down Expand Up @@ -101,12 +108,14 @@ EscalateResponse(TSCont cont, TSEvent event, void *edata)

// First, we need the server response ...
if (TS_SUCCESS != TSHttpTxnServerRespGet(txn, &mbuf, &hdrp)) {
goto no_action;
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}

tries = TSHttpTxnRedirectRetries(txn);
if (0 != tries) { // ToDo: Future support for more than one retry-URL
goto no_action;
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
Dbg(dbg_ctl, "This is try %d, proceeding", tries);

Expand All @@ -117,24 +126,26 @@ EscalateResponse(TSCont cont, TSEvent event, void *edata)
// See if we have an escalation retry config for this response code
entry = es->status_map.find(static_cast<unsigned>(status));
if (entry == es->status_map.end()) {
goto no_action;
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
EscalationState::RetryInfo const &retry_info = entry->second;

Dbg(dbg_ctl, "Found an entry for HTTP status %u", static_cast<unsigned>(status));
if (EscalationState::RETRY_URL == entry->second.type) {
url_str = TSstrdup(entry->second.target.c_str());
url_len = entry->second.target.size();
if (EscalationState::RETRY_URL == retry_info.type) {
url_str = TSstrdup(retry_info.target.c_str());
url_len = retry_info.target.size();
Dbg(dbg_ctl, "Setting new URL to %.*s", url_len, url_str);
} else if (EscalationState::RETRY_HOST == entry->second.type) {
} else if (EscalationState::RETRY_HOST == retry_info.type) {
if (es->use_pristine) {
if (TS_SUCCESS == TSHttpTxnPristineUrlGet(txn, &mbuf, &url)) {
url_str = MakeEscalateUrl(mbuf, url, entry->second.target.c_str(), entry->second.target.size(), url_len);
url_str = MakeEscalateUrl(mbuf, url, retry_info.target.c_str(), retry_info.target.size(), url_len);
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, url);
}
} else {
if (TS_SUCCESS == TSHttpTxnClientReqGet(txn, &mbuf, &hdrp)) {
if (TS_SUCCESS == TSHttpHdrUrlGet(mbuf, hdrp, &url)) {
url_str = MakeEscalateUrl(mbuf, url, entry->second.target.c_str(), entry->second.target.size(), url_len);
url_str = MakeEscalateUrl(mbuf, url, retry_info.target.c_str(), retry_info.target.size(), url_len);
}
// Release the request MLoc
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
Expand All @@ -147,8 +158,86 @@ EscalateResponse(TSCont cont, TSEvent event, void *edata)
TSHttpTxnRedirectUrlSet(txn, url_str, url_len); // Transfers ownership
}

// Set the transaction free ...
no_action:
// Set the transaction free ...
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}

//////////////////////////////////////////////////////////////////////////////////////////
// Handle origin connection issues for escallation.
//
static int
EscalateResponseServerError(TSCont cont, TSEvent event, void *edata)
{
// ---------------------------------------------------------------------
// TODO: maybe consolidate this with EscalateResponse() function above?
// ---------------------------------------------------------------------

TSHttpTxn txn = static_cast<TSHttpTxn>(edata);
EscalationState *es = static_cast<EscalationState *>(TSContDataGet(cont));
TSMBuffer mbuf = nullptr;
TSMLoc hdrp, url;
char *url_str = nullptr;
int url_len = 0;

TSAssert(event == TS_EVENT_HTTP_SEND_RESPONSE_HDR);

if (TS_SUCCESS != TSHttpTxnClientRespGet(txn, &mbuf, &hdrp)) {
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}

auto const tries = TSHttpTxnRedirectRetries(txn);
if (0 != tries) {
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
Dbg(dbg_ctl, "Server connection error: this is try %d, proceeding", tries);

// Check whether the connection is alive.
TSServerState const state = TSHttpTxnServerStateGet(txn);
if (state == TS_SRVSTATE_CONNECTION_ALIVE) {
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
// See if we have an escalation retry config for status 500.
TSHttpStatus const status = TSHttpHdrStatusGet(mbuf, hdrp);
auto entry = es->status_map.find(status);
if (entry == es->status_map.end()) {
Dbg(dbg_ctl, "Server connection error: unregistered status response code %d", status);
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
Dbg(dbg_ctl, "Server connection error: found an entry for rcode:%d, state:%d", status, state);

EscalationState::RetryInfo const &retry_info = entry->second;
if (EscalationState::RETRY_URL == retry_info.type) {
url_str = TSstrdup(retry_info.target.c_str());
url_len = retry_info.target.size();
Dbg(dbg_ctl, "Server connection error: setting new URL to %.*s", url_len, url_str);
} else if (EscalationState::RETRY_HOST == retry_info.type) {
if (TS_SUCCESS == TSHttpTxnClientReqGet(txn, &mbuf, &hdrp)) {
if (TS_SUCCESS == TSHttpHdrUrlGet(mbuf, hdrp, &url)) {
// Update the request URL with the new Host to try.
TSUrlHostSet(mbuf, url, retry_info.target.c_str(), retry_info.target.size());
url_str = TSUrlStringGet(mbuf, url, &url_len);
Dbg(dbg_ctl, "Server connection error: setting new Host: to %.*s", url_len, url_str);
}
// Release the request MLoc
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
}
}

// Now update the Redirect URL, if set
if (url_str) {
TSHttpTxnRedirectUrlSet(txn, url_str, url_len); // Transfers ownership
}
// Set the transaction free ...
TSHandleMLocRelease(mbuf, TS_NULL_MLOC, hdrp);
TSHttpTxnReenable(txn, TS_EVENT_HTTP_CONTINUE);
return TS_EVENT_NONE;
}
Expand Down Expand Up @@ -228,6 +317,7 @@ TSRemapDoRemap(void *instance, TSHttpTxn txn, TSRemapRequestInfo * /* rri */)
{
EscalationState *es = static_cast<EscalationState *>(instance);

TSHttpTxnHookAdd(txn, TS_HTTP_READ_RESPONSE_HDR_HOOK, es->cont);
TSHttpTxnHookAdd(txn, TS_HTTP_READ_RESPONSE_HDR_HOOK, es->cont);
return TSREMAP_NO_REMAP;
}
136 changes: 136 additions & 0 deletions tests/gold_tests/pluginTest/escalate/escalate.test.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,136 @@
'''
'''
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import os
from ports import get_port

Test.Summary = '''
Test the escalate plugin.
'''

Test.SkipUnless(Condition.PluginExists('escalate.so'))


class EscalateTest:
"""
Test the escalate plugin.
"""

_replay_original_file: str = 'escalate_original.replay.yaml'
_replay_failover_file: str = 'escalate_failover.replay.yaml'

def __init__(self):
'''Configure the test run.'''
tr = Test.AddTestRun('Test escalate plugin.')
self._setup_dns(tr)
self._setup_servers(tr)
self._setup_ts(tr)
self._setup_client(tr)

def _setup_dns(self, tr: 'Process') -> None:
'''Set up the DNS server.
:param tr: The test run to add the DNS server to.
'''
self._dns = tr.MakeDNServer(f"dns", default='127.0.0.1')

def _setup_servers(self, tr: 'Process') -> None:
'''Set up the origin and failover servers.
:param tr: The test run to add the servers to.
'''
tr.Setup.Copy(self._replay_original_file)
tr.Setup.Copy(self._replay_failover_file)
self._server_origin = tr.AddVerifierServerProcess(f"server_origin", self._replay_original_file)
self._server_failover = tr.AddVerifierServerProcess(f"server_failover", self._replay_failover_file)

self._server_origin.Streams.All += Testers.ContainsExpression(
'uuid: GET', "Verify the origin server received the GET request.")
self._server_origin.Streams.All += Testers.ContainsExpression(
'uuid: POST', "Verify the origin server received the POST request.")
self._server_origin.Streams.All += Testers.ContainsExpression(
'uuid: POST_failed', "Verify the origin server received the POST request that it returns a 502 with.")
self._server_origin.Streams.All += Testers.ContainsExpression(
'uuid: CHUNKED_POST', "Verify the origin server received the chunked POST request.")
self._server_origin.Streams.All += Testers.ContainsExpression(
'uuid: CHUNKED_POST_failed', "Verify the origin server received the chunked POST request that it returns a 502 with.")

self._server_failover.Streams.All += Testers.ContainsExpression(
'uuid: POST_failed', "Verify the failover server received the failed POST request.")
self._server_failover.Streams.All += Testers.ContainsExpression(
'uuid: CHUNKED_POST_failed', "Verify the failover server received the failed chunked POST request.")
self._server_failover.Streams.All += Testers.ContainsExpression(
'uuid: POST_down_origin', "Verify the failover server received the POST request that the origin server is down.")

self._server_failover.Streams.All += Testers.ExcludesExpression(
'uuid: GET', "Verify the failover server did not receive the GET request.")
self._server_failover.Streams.All += Testers.ExcludesExpression(
'uuid: POST', "Verify the failover server did not receive the POST request.")
self._server_failover.Streams.All += Testers.ExcludesExpression(
'uuid: CHUNKED_POST', "Verify the failover server did not receive the chunked POST request.")

def _setup_ts(self, tr: 'Process') -> None:
'''Set up Traffic Server.
:param tr: The test run to add Traffic Server to.
'''
self._ts = tr.MakeATSProcess(f"ts", enable_cache=False)
# Select a port that is guaranteed to not be used at the moment.
dead_port = get_port(self._ts, "dead_port")
self._ts.Disk.records_config.update(
{
'proxy.config.diags.debug.enabled': 1,
'proxy.config.diags.debug.tags': 'http|escalate',
'proxy.config.dns.nameservers': f'127.0.0.1:{self._dns.Variables.Port}',
'proxy.config.dns.resolv_conf': 'NULL',
})
self._ts.Disk.remap_config.AddLines(
[
f'map https://origin.server.com https://backend.origin.server.com:{self._server_origin.Variables.http_port} '
f'@plugin=escalate.so @pparam=500,502:failover.server.com:{self._server_failover.Variables.http_port} '

# Now create remap entries for the multiplexed hosts: one that
# verifies HTTP, and another that verifies HTTPS.
f'map http://down_origin.server.com http://backend.down_origin.server.com:{dead_port} '
f'@plugin=escalate.so @pparam=500,502:failover.server.com:{self._server_failover.Variables.http_port} '
])

def _setup_client(self, tr: 'Process') -> None:
'''Set up the client.
:param tr: The test run to add the client to.
'''
client = tr.AddVerifierClientProcess(f"client", self._replay_original_file, http_ports=[self._ts.Variables.port])
client.StartBefore(self._dns)
client.StartBefore(self._server_origin)
client.StartBefore(self._server_failover)
client.StartBefore(self._ts)

client.Streams.All += Testers.ExcludesExpression(r'\[ERROR\]', 'Verify there were no errors in the replay.')
client.Streams.All += Testers.ExcludesExpression('400 Bad', 'Verify none of the 400 responses make it to the client.')
client.Streams.All += Testers.ExcludesExpression('502 Bad', 'Verify none of the 502 responses make it to the client.')
client.Streams.All += Testers.ExcludesExpression('500 Internal', 'Verify none of the 500 responses make it to the client.')
client.Streams.All += Testers.ContainsExpression('X-Response: first', 'Verify that the first response was received.')
client.Streams.All += Testers.ContainsExpression('X-Response: second', 'Verify that the second response was received.')
client.Streams.All += Testers.ContainsExpression('X-Response: third', 'Verify that the third response was received.')
client.Streams.All += Testers.ContainsExpression('X-Response: fourth', 'Verify that the fourth response was received.')
client.Streams.All += Testers.ContainsExpression('X-Response: fifth', 'Verify that the fifth response was received.')
client.Streams.All += Testers.ContainsExpression('X-Response: sixth', 'Verify that the sixth response was received.')


EscalateTest()
Loading

0 comments on commit 5072906

Please sign in to comment.