From 37bf61294c8385e806ff007abbd49c9b79306f74 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:25:32 -0700 Subject: [PATCH 01/50] Rename netcommon.h --- include/core/federated/network/{net_common.h => socket_common.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename include/core/federated/network/{net_common.h => socket_common.h} (100%) diff --git a/include/core/federated/network/net_common.h b/include/core/federated/network/socket_common.h similarity index 100% rename from include/core/federated/network/net_common.h rename to include/core/federated/network/socket_common.h From 7fb3c977039c00e4f2c3117578ecb2e00abf5121 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:30:51 -0700 Subject: [PATCH 02/50] Rename --- include/core/federated/network/{net_common.h => copy} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename include/core/federated/network/{net_common.h => copy} (100%) diff --git a/include/core/federated/network/net_common.h b/include/core/federated/network/copy similarity index 100% rename from include/core/federated/network/net_common.h rename to include/core/federated/network/copy From 5ba5a0898ecef83b2b7e60090fca5e6762d4852b Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:31:26 -0700 Subject: [PATCH 03/50] Rename --- include/core/federated/network/{copy => net_common.h} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename include/core/federated/network/{copy => net_common.h} (100%) diff --git a/include/core/federated/network/copy b/include/core/federated/network/net_common.h similarity index 100% rename from include/core/federated/network/copy rename to include/core/federated/network/net_common.h From 6418a39e90f9351b21bf2331927c43965692ae82 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:42:38 -0700 Subject: [PATCH 04/50] Rename net_util.c --- core/federated/network/{net_util.c => socket_common.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/federated/network/{net_util.c => socket_common.c} (100%) diff --git a/core/federated/network/net_util.c b/core/federated/network/socket_common.c similarity index 100% rename from core/federated/network/net_util.c rename to core/federated/network/socket_common.c From d209fe3409b114752be4a77a5ebc6aa3fd798d25 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:43:11 -0700 Subject: [PATCH 05/50] Temp copy --- core/federated/network/{net_util.c => copy} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/federated/network/{net_util.c => copy} (100%) diff --git a/core/federated/network/net_util.c b/core/federated/network/copy similarity index 100% rename from core/federated/network/net_util.c rename to core/federated/network/copy From 1afab206f2d879ee736e67a75e4aa2e31e7dbf15 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 11 Dec 2024 17:43:41 -0700 Subject: [PATCH 06/50] Rename to net_util.c --- core/federated/network/{copy => net_util.c} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename core/federated/network/{copy => net_util.c} (100%) diff --git a/core/federated/network/copy b/core/federated/network/net_util.c similarity index 100% rename from core/federated/network/copy rename to core/federated/network/net_util.c From a66ff845dc3f9ecec2d277b172890e63f3f83ec1 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:29:32 -0700 Subject: [PATCH 07/50] Test --- .../core/federated/network/socket_common.h | 734 +++--------------- 1 file changed, 114 insertions(+), 620 deletions(-) diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 79ce19550..255964b62 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -1,183 +1,9 @@ -/** - * @file - * @author Edward A. Lee (eal@berkeley.edu) - * @author Soroush Bateni (soroush@utdallas.edu) - * - * @section LICENSE -Copyright (c) 2020, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. - -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. - -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ -/** - * @section DESCRIPTION - * Header file for common message types and definitions for federated Lingua Franca programs. - * - * This file defines the message types for the federate to communicate with the RTI. - * Each message type has a unique one-byte ID. - * - * The startup sequence is as follows: - * - * Each federate attempts to connect with an RTI at the IP address - * put into its code by the code generator (i.e., it attempts to - * open a TCP connection). If an explicit port is given in the `at` clause - * on the `federated reactor` statement, it will use that port. Otherwise, it will - * use DEFAULT_PORT. - * - * When it has successfully opened a TCP connection, the first message it sends - * to the RTI is a MSG_TYPE_FED_IDS message, which contains the ID of this federate - * within the federation, contained in the global variable _lf_my_fed_id - * in the federate code - * (which is initialized by the code generator) and the unique ID of - * the federation, a GUID that is created at run time by the generated script - * that launches the federation. - * If you launch the federates and the RTI manually, rather than using the script, - * then the federation ID is a string that is optionally given to the federate - * on the command line when it is launched. The federate will connect - * successfully only to an RTI that is given the same federation ID on - * its command line. If no ID is given on the command line, then the - * default ID "Unidentified Federation" will be used. - * - * The RTI will respond with a MSG_TYPE_REJECT message if the federation IDs - * do not match and close the connection. At this point the federate - * will increment the port number and try again to find an RTI that matches. - * - * When the federation IDs match, the RTI will respond with an - * MSG_TYPE_ACK. - * - * The next message to the RTI will be a MSG_TYPE_NEIGHBOR_STRUCTURE message - * that informs the RTI about connections between this federate and other - * federates where messages are routed through the RTI. Currently, this only - * includes logical connections when the coordination is centralized. This - * information is needed for the RTI to perform the centralized coordination. - * The burden is on the federates to inform the RTI about relevant connections. - * - * The next message to the RTI will be a MSG_TYPE_UDP_PORT message, which has - * payload USHRT_MAX if clock synchronization is disabled altogether, 0 if - * only initial clock synchronization is enabled, and a port number for - * UDP communication if runtime clock synchronization is enabled. - * By default, if the federate host is identical to that of the RTI - * (either no "at" clause is given for either or they both have exactly - * the same string), then clock synchronization is disabled. - * Otherwise, the default is that initial clock synchronization is enabled. - * To turn turn off clock synchronization altogether, set the clock-sync - * property of the target to off. To turn on runtime clock synchronization, - * set it to on. The default value is initial. - * - * If initial clock sync is enabled, the next step is to perform the initial - * clock synchronization (using the TCP connection), which attempts - * to find an initial offset to the physical clock of the federate to make it - * better match the physical clock at the RTI. - * - * Clock synchronization is initiated by the RTI by sending a message - * of type MSG_TYPE_CLOCK_SYNC_T1, the payload of which is the - * current physical clock reading at the RTI. The federate records - * the physical time when it receives this message (T2) and sends - * a reply message of type MSG_TYPE_CLOCK_SYNC_T3 to the RTI. - * It records the time (T3) at which this message has gone out. - * The payload of the MSG_TYPE_CLOCK_SYNC_T3 message is the - * federate ID. The RTI responds to the T3 message with a message - * of type MSG_TYPE_CLOCK_SYNC_T4, which has as a payload - * the physical time at which that response was sent. This cycle will happen - * _LF_CLOCK_SYNC_EXCHANGES_PER_INTERVAL times at startup to account for network delay variations - * (see below). - * - * The times T1 and T4 are taken from the physical clock at the RTI, - * whereas the times T2 and T3 are taken from the physical clock at - * the federate. The round trip latency on the connection to the RTI - * is therefore measured as (T4 - T1) - (T3 - T2). Half this quantity - * is an estimate L of the one-way latency. The estimated clock error - * E is therefore L - (T2 - T1). Over several cycles, the average value of E - * becomes the initial offset for the - * clock at the federate. Henceforth, when lf_time_physical() is - * called, the offset will be added to whatever the physical clock says. - * - * If clock synchronization is enabled, then the federate will also - * start a thread to listen for incoming UDP messages from the RTI. - * With period given by the `-c on period ` command-line argument, the RTI - * will initiate a clock synchronization round by sending to the - * federate a MSG_TYPE_CLOCK_SYNC_T1 message. A similar - * protocol to that above is followed to estimate the average clock - * synchronization error E, with two exceptions. First, a fraction - * of E (given by _LF_CLOCK_SYNC_ATTENUATION) is used to adjust the - * offset up or down rather than just setting the offset equal to E. - * Second, after MSG_TYPE_CLOCK_SYNC_T4, the RTI immediately - * sends a following message of type MSG_TYPE_CLOCK_SYNC_CODED_PROBE. - * The federate measures the time difference between its receipt of - * T4 and this code probe and compares that time difference against - * the time difference at the RTI (the difference between the two - * payloads). If that difference is larger than CLOCK_SYNC_GUARD_BAND - * in magnitude, then the clock synchronization round is skipped - * and no adjustment is made. The round will also be skipped if - * any of the expected UDP messages fails to arrive. - * - * FIXME: Citation needed here. - * - * The next step depends on the coordination mode. If the coordination - * parameter of the target is "decentralized" and the federate has - * inbound connections from other federates, then it starts a socket - * server to listen for incoming connections from those federates. - * It then sends to the RTI an MSG_TYPE_ADDRESS_ADVERTISEMENT message - * with the port number as a payload. The federate then creates a thread - * to listen for incoming socket connections and messages. - * - * If the federate has outbound connections to other federates, then it - * establishes a socket connection to those federates. It does this by - * first sending to the RTI an MSG_TYPE_ADDRESS_QUERY message with the payload - * being the ID of the federate it wishes to connect to. If the RTI - * responds with a -1, then the RTI does not (yet) know the remote federate's - * port number and IP address, so the local federate will try again - * after waiting ADDRESS_QUERY_RETRY_INTERVAL. When it gets a valid port - * number and IP address in reply, it will establish a socket connection - * to that remote federate. - * - * Physical connections also use the above P2P sockets between - * federates even if the coordination is centralized. - * - * Afterward, the federates and the RTI decide on a common start time by having - * each federate report a reading of its physical clock to the RTI on a - * `MSG_TYPE_TIMESTAMP`. The RTI broadcasts the maximum of these readings plus - * `DELAY_START` to all federates as the start time, again on a `MSG_TYPE_TIMESTAMP`. - * - * The next step depends on the coordination type. - * - * Under centralized coordination, each federate will send a - * `MSG_TYPE_NEXT_EVENT_TAG` to the RTI with the start tag. That is to say that - * each federate has a valid event at the start tag (start time, 0) and it will - * inform the RTI of this event. - * Subsequently, at the conclusion of each tag, each federate will send a - * `MSG_TYPE_LATEST_TAG_CONFIRMED` followed by a `MSG_TYPE_NEXT_EVENT_TAG` (see - * the comment for each message for further explanation). Each federate would - * have to wait for a `MSG_TYPE_TAG_ADVANCE_GRANT` or a - * `MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT` before it can advance to a - * particular tag. - * - * Under decentralized coordination, the coordination is governed by STA and - * STAAs, as further explained in https://doi.org/10.48550/arXiv.2109.07771. - * - * FIXME: Expand this. Explain port absent reactions. - * - */ +#ifndef SOCKET_COMMON_H +#define SOCKET_COMMON_H -#ifndef NET_COMMON_H -#define NET_COMMON_H +#include "low_level_platform.h" +#define NUM_SOCKET_RETRIES 10 +#define DELAY_BETWEEN_SOCKET_RETRIES MSEC(100) /** * The timeout time in ns for TCP operations. @@ -191,13 +17,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define UDP_TIMEOUT_TIME SEC(1) -/** - * Size of the buffer used for messages sent between federates. - * This is used by both the federates and the rti, so message lengths - * should generally match. - */ -#define FED_COM_BUFFER_SIZE 256u - /** * Time between a federate's attempts to connect to the RTI. */ @@ -217,14 +36,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define MAX_NUM_PORT_ADDRESSES 16 -/** - * Time that a federate waits before asking - * the RTI again for the port and IP address of a federate - * (an MSG_TYPE_ADDRESS_QUERY message) after the RTI responds that it - * does not know. This allows time for federates to start separately. - */ -#define ADDRESS_QUERY_RETRY_INTERVAL MSEC(250) - /** * Time to wait before re-attempting to bind to a port. * When a process closes, the network stack typically waits between 30 and 120 @@ -247,456 +58,139 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define DEFAULT_PORT 15045u -/** - * Delay the start of all federates by this amount. - * This helps ensure that the federates do not start at the same time. - * Each federate has provided its current physical time to the RTI, and - * the RTI has picked the largest of these. It will add this quantity - * and declare that to be the start time. - * FIXME: This could use the latency estimates that were - * acquired during initial clock synchronization. - */ -#define DELAY_START SEC(1) - -//////////////////////////////////////////// -//// Message types - -// These message types will be encoded in an unsigned char, -// so the magnitude must not exceed 255. Note that these are -// listed in increasing numerical order starting from 0 interleaved -// with decreasing numerical order starting from 255 (so that they -// can be listed in a logical order here even as the design evolves). - -/** - * Byte identifying a rejection of the previously received message. - * The reason for the rejection is included as an additional byte - * (uchar) (see below for encodings of rejection reasons). - */ -#define MSG_TYPE_REJECT 0 - -/** - * Byte identifying an acknowledgment of the previously received message. - * This message carries no payload. - */ -#define MSG_TYPE_ACK 255 - -/** - * Byte identifying an acknowledgment of the previously received MSG_TYPE_FED_IDS message - * sent by the RTI to the federate - * with a payload indicating the UDP port to use for clock synchronization. - * The next four bytes will be the port number for the UDP server, or - * 0 or USHRT_MAX if there is no UDP server. 0 means that initial clock synchronization - * is enabled, whereas USHRT_MAX mean that no synchronization should be performed at all. - */ -#define MSG_TYPE_UDP_PORT 254 - -/** Byte identifying a message from a federate to an RTI containing - * the federation ID and the federate ID. The message contains, in - * this order: - * * One byte equal to MSG_TYPE_FED_IDS. - * * Two bytes (ushort) giving the federate ID. - * * One byte (uchar) giving the length N of the federation ID. - * * N bytes containing the federation ID. - * Each federate needs to have a unique ID between 0 and - * NUMBER_OF_FEDERATES-1. - * Each federate, when starting up, should send this message - * to the RTI. This is its first message to the RTI. - * The RTI will respond with either MSG_TYPE_REJECT, MSG_TYPE_ACK, or MSG_TYPE_UDP_PORT. - * If the federate is a C target LF program, the generated federate - * code does this by calling lf_synchronize_with_other_federates(), - * passing to it its federate ID. - */ -#define MSG_TYPE_FED_IDS 1 - -/////////// Messages used for authenticated federation. /////////////// -/** - * Byte identifying a message from a federate to an RTI containing - * federate's 8-byte random nonce for HMAC-based authentication. The federate sends this - * message to an incoming RTI when TCP connection is established - * between the RTI and the federate. - * The message contains, in this order: - * * One byte equal to MSG_TYPE_FED_NONCE. - * * Two bytes (ushort) giving the federate ID. - * * Eight bytes for federate's nonce. - */ -#define MSG_TYPE_FED_NONCE 100 - -/** - * Byte identifying a message from RTI to federate as a response to the FED_NONCE - * message. The RTI sends this message to federate for HMAC-based authentication. - * The message contains, in this order: - * * One byte equal to MSG_TYPE_RTI_RESPONSE. - * * Eight bytes for RTI's nonce. - * * 32 bytes for HMAC tag based on SHA256. - * The HMAC tag is composed of the following order: - * * One byte equal to MSG_TYPE_RTI_RESPONSE. - * * Two bytes (ushort) giving the received federate ID. - * * Eight bytes for received federate's nonce. - */ -#define MSG_TYPE_RTI_RESPONSE 101 - -/** - * Byte identifying a message from federate to RTI as a response to the RTI_RESPONSE - * message. The federate sends this message to RTI for HMAC-based authentication. - * The message contains, in this order: - * * One byte equal to MSG_TYPE_FED_RESPONSE. - * * 32 bytes for HMAC tag based on SHA256. - * The HMAC tag is composed of the following order: - * * One byte equal to MSG_TYPE_FED_RESPONSE. - * * Eight bytes for received RTI's nonce. - */ -#define MSG_TYPE_FED_RESPONSE 102 - -/** - * The randomly created nonce size will be 8 bytes. - */ -#define NONCE_LENGTH 8 - -/** - * The HMAC tag uses the SHA256 hash algorithm, creating a 32 byte length hash tag. - */ -#define SHA256_HMAC_LENGTH 32 - -/** - * Byte identifying a timestamp message, which is 64 bits long. - * Each federate sends its starting physical time as a message of this - * type, and the RTI broadcasts to all the federates the starting logical - * time as a message of this type. - s*/ -#define MSG_TYPE_TIMESTAMP 2 -#define MSG_TYPE_TIMESTAMP_LENGTH (1 + sizeof(int64_t)) - -/** Byte identifying a message to forward to another federate. - * The next two bytes will be the ID of the destination port. - * The next two bytes are the destination federate ID. - * The four bytes after that will be the length of the message. - * The remaining bytes are the message. - * NOTE: This is currently not used. All messages are tagged, even - * on physical connections, because if "after" is used, the message - * may preserve the logical timestamp rather than using the physical time. - */ -#define MSG_TYPE_MESSAGE 3 - -/** - * Byte identifying that the federate or the RTI is ending its execution. - */ -#define MSG_TYPE_RESIGN 4 - -/** - * Byte identifying a timestamped message to forward to another federate. - * The next two bytes will be the ID of the destination reactor port. - * The next two bytes are the destination federate ID. - * The four bytes after that will be the length of the message (as an unsigned 32-bit int). - * The next eight bytes will be the timestamp of the message. - * The next four bytes will be the microstep of the message. - * The remaining bytes are the message. - * - * With centralized coordination, all such messages flow through the RTI. - * With decentralized coordination, tagged messages are sent peer-to-peer - * between federates and are marked with MSG_TYPE_P2P_TAGGED_MESSAGE. - */ -#define MSG_TYPE_TAGGED_MESSAGE 5 - -/** - * Byte identifying a next event tag (NET) message sent from a federate in - * centralized coordination. The next eight bytes will be the timestamp. The - * next four bytes will be the microstep. This message from a federate tells the - * RTI the tag of the earliest event on that federate's event queue. In other - * words, absent any further inputs from other federates, this will be the least - * tag of the next set of reactions on that federate. If the event queue is - * empty and a timeout time has been specified, then the timeout time will be - * sent. If there is no timeout time, then FOREVER will be sent. Note that if - * there are physical actions and the earliest event on the event queue has a - * tag that is ahead of physical time (or the queue is empty), the federate - * should try to regularly advance its tag (and thus send NET messages) to make - * sure downstream federates can make progress. - */ -#define MSG_TYPE_NEXT_EVENT_TAG 6 - -/** - * Byte identifying a time advance grant (TAG) sent by the RTI to a federate - * in centralized coordination. This message is a promise by the RTI to the federate - * that no later message sent to the federate will have a tag earlier than or - * equal to the tag carried by this TAG message. - * The next eight bytes will be the timestamp. - * The next four bytes will be the microstep. - */ -#define MSG_TYPE_TAG_ADVANCE_GRANT 7 - -/** - * Byte identifying a provisional time advance grant (PTAG) sent by the RTI to a federate - * in centralized coordination. This message is a promise by the RTI to the federate - * that no later message sent to the federate will have a tag earlier than the tag - * carried by this PTAG message. - * The next eight bytes will be the timestamp. - * The next four bytes will be the microstep. - */ -#define MSG_TYPE_PROVISIONAL_TAG_ADVANCE_GRANT 8 - -/** - * Byte identifying a latest tag confirmed (LTC) message sent by a federate - * to the RTI. - * The next eight bytes will be the timestep of the completed tag. - * The next four bytes will be the microsteps of the completed tag. - */ -#define MSG_TYPE_LATEST_TAG_CONFIRMED 9 - -/////////// Messages used in lf_request_stop() /////////////// -//// Overview of the algorithm: -//// When any federate calls lf_request_stop(), it will -//// send a MSG_TYPE_STOP_REQUEST message to the RTI, which will then -//// forward a MSG_TYPE_STOP_REQUEST message -//// to any federate that has not yet provided a stop time to the RTI. The federates will reply -//// with a MSG_TYPE_STOP_REQUEST_REPLY and a stop tag (which shall be the -//// maximum of their current logical tag -//// at the time they receive the MSG_TYPE_STOP_REQUEST and the tag of the stop -//// request). When the RTI has gathered all the stop tags -//// from federates (that are still connected), it will decide on a common stop tag -//// which is the maximum of the seen stop tag and answer with a MSG_TYPE_STOP_GRANTED. The federate -//// sending the MSG_TYPE_STOP_REQUEST and federates sending the MSG_TYPE_STOP_REQUEST_REPLY will freeze -//// the advancement of tag until they receive the MSG_TYPE_STOP_GRANTED message, in which -//// case they might continue their execution until the stop tag has been reached. - -/** - * Byte identifying a stop request. This message is first sent to the RTI by a federate - * that would like to stop execution at the specified tag. The RTI will forward - * the MSG_TYPE_STOP_REQUEST to all other federates. Those federates will either agree to - * the requested tag or propose a larger tag. The RTI will collect all proposed - * tags and broadcast the largest of those to all federates. All federates - * will then be expected to stop at the granted tag. - * - * The next 8 bytes will be the timestamp. - * The next 4 bytes will be the microstep. - * - * NOTE: The RTI may reply with a larger tag than the one specified in this message. - * It has to be that way because if any federate can send a MSG_TYPE_STOP_REQUEST message - * that specifies the stop time on all other federates, then every federate - * depends on every other federate and time cannot be advanced. - * Hence, the actual stop time may be nondeterministic. - * - * If, on the other hand, the federate requesting the stop is upstream of every - * other federate, then it should be possible to respect its requested stop tag. - */ -#define MSG_TYPE_STOP_REQUEST 10 -#define MSG_TYPE_STOP_REQUEST_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_REQUEST(buffer, time, microstep) \ - do { \ - buffer[0] = MSG_TYPE_STOP_REQUEST; \ - encode_int64(time, &(buffer[1])); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ - } while (0) - -/** - * Byte indicating a federate's reply to a MSG_TYPE_STOP_REQUEST that was sent - * by the RTI. The payload is a proposed stop tag that is at least as large - * as the one sent to the federate in a MSG_TYPE_STOP_REQUEST message. - * - * The next 8 bytes will be the timestamp. - * The next 4 bytes will be the microstep. - */ -#define MSG_TYPE_STOP_REQUEST_REPLY 11 -#define MSG_TYPE_STOP_REQUEST_REPLY_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_REQUEST_REPLY(buffer, time, microstep) \ - do { \ - buffer[0] = MSG_TYPE_STOP_REQUEST_REPLY; \ - encode_int64(time, &(buffer[1])); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ - } while (0) - -/** - * Byte sent by the RTI indicating that the stop request from some federate - * has been granted. The payload is the tag at which all federates have - * agreed that they can stop. - * The next 8 bytes will be the time at which the federates will stop. * - * The next 4 bytes will be the microstep at which the federates will stop.. - */ -#define MSG_TYPE_STOP_GRANTED 12 -#define MSG_TYPE_STOP_GRANTED_LENGTH (1 + sizeof(instant_t) + sizeof(microstep_t)) -#define ENCODE_STOP_GRANTED(buffer, time, microstep) \ - do { \ - buffer[0] = MSG_TYPE_STOP_GRANTED; \ - encode_int64(time, &(buffer[1])); \ - encode_int32((int32_t)microstep, &(buffer[1 + sizeof(instant_t)])); \ - } while (0) - -/////////// End of lf_request_stop() messages //////////////// - -/** - * Byte identifying a address query message, sent by a federate to RTI - * to ask for another federate's address and port number. - * The next two bytes are the other federate's ID. - */ -#define MSG_TYPE_ADDRESS_QUERY 13 +typedef enum socket_type_t { TCP, UDP } socket_type_t; /** - * Byte identifying a address query message reply, sent by a RTI to a federate - * to reply with a remote federate's address and port number. - * The reply from the RTI will be a port number (an int32_t), which is -1 - * if the RTI does not know yet (it has not received MSG_TYPE_ADDRESS_ADVERTISEMENT from - * the other federate), followed by the IP address of the other - * federate (an IPV4 address, which has length INET_ADDRSTRLEN). - * The next four bytes (or sizeof(int32_t)) will be the port number. - * The next four bytes (or sizeof(in_addr), which is uint32_t) will be the ip address. + * Mutex protecting socket close operations. */ -#define MSG_TYPE_ADDRESS_QUERY_REPLY 14 +extern lf_mutex_t socket_mutex; /** - * Byte identifying a message advertising the port for the TCP connection server - * of a federate. This is utilized in decentralized coordination as well as for physical - * connections in centralized coordination. - * The next four bytes (or sizeof(int32_t)) will be the port number. - * The sending federate will not wait for a response from the RTI and assumes its - * request will be processed eventually by the RTI. - */ -#define MSG_TYPE_ADDRESS_ADVERTISEMENT 15 - -/** - * Byte identifying a first message that is sent by a federate directly to another federate - * after establishing a socket connection to send messages directly to the federate. This - * first message contains two bytes identifying the sending federate (its ID), a byte - * giving the length of the federation ID, followed by the federation ID (a string). - * The response from the remote federate is expected to be MSG_TYPE_ACK, but if the remote - * federate does not expect this federate or federation to connect, it will respond - * instead with MSG_TYPE_REJECT. - */ -#define MSG_TYPE_P2P_SENDING_FED_ID 16 - -/** - * Byte identifying a message to send directly to another federate. + * @brief Create an IPv4 TCP socket with Nagle's algorithm disabled + * (TCP_NODELAY) and Delayed ACKs disabled (TCP_QUICKACK). Exits application + * on any error. * - * The next two bytes will be the ID of the destination port. - * The next two bytes are the destination federate ID. This is checked against - * the _lf_my_fed_id of the receiving federate to ensure the message was intended for - * The four bytes after will be the length of the message. - * The ramaining bytes are the message. + * @return The socket ID (a file descriptor). */ -#define MSG_TYPE_P2P_MESSAGE 17 +int create_real_time_tcp_socket_errexit(); /** - * Byte identifying a timestamped message to send directly to another federate. - * This is a variant of @see MSG_TYPE_TAGGED_MESSAGE that is used in P2P connections between - * federates. Having a separate message type for P2P connections between federates - * will be useful in preventing crosstalk. - * - * The next two bytes will be the ID of the destination port. - * The next two bytes are the destination federate ID. This is checked against - * the _lf_my_fed_id of the receiving federate to ensure the message was intended for - * the correct federate. - * The four bytes after will be the length of the message. - * The next eight bytes will be the timestamp. - * The next four bytes will be the microstep of the sender. - * The ramaining bytes are the message. + * Read the specified number of bytes from the specified socket into the specified buffer. + * If an error occurs during this reading, return -1 and set errno to indicate + * the cause of the error. If the read succeeds in reading the specified number of bytes, + * return 0. If an EOF occurs before reading the specified number of bytes, return 1. + * This function repeats the read attempt until the specified number of bytes + * have been read, an EOF is read, or an error occurs. Specifically, errors EAGAIN, + * EWOULDBLOCK, and EINTR are not considered errors and instead trigger + * another attempt. A delay between attempts is given by DELAY_BETWEEN_SOCKET_RETRIES. + * @param socket The socket ID. + * @param num_bytes The number of bytes to read. + * @param buffer The buffer into which to put the bytes. + * @return 0 for success, 1 for EOF, and -1 for an error. */ -#define MSG_TYPE_P2P_TAGGED_MESSAGE 18 +int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer); -//////////////////////////////////////////////// /** - * Physical clock synchronization messages according to PTP. - */ - -/* - * The next 8 bytes will be a timestamp sent according to - * PTP. - */ -#define MSG_TYPE_CLOCK_SYNC_T1 19 - -/* - * Prompts the master to send a T4. - * The next four bytes will be the sendin federate's id - */ -#define MSG_TYPE_CLOCK_SYNC_T3 20 - -/* - * The next 8 bytes will be a timestamp sent according to - * PTP. + * Read the specified number of bytes to the specified socket using read_from_socket + * and close the socket if an error occurs. If an error occurs, this will change the + * socket ID pointed to by the first argument to -1 and will return -1. + * @param socket Pointer to the socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @return 0 for success, -1 for failure. */ -#define MSG_TYPE_CLOCK_SYNC_T4 21 +int read_from_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); /** - * Coded probe message. - * This messages is sent by the server (master) - * right after MSG_TYPE_CLOCK_SYNC_T4(t1) with a new physical clock snapshot t2. - * At the receiver, the previous MSG_TYPE_CLOCK_SYNC_T4 message and this message - * are assigned a receive timestamp r1 and r2. If |(r2 - r1) - (t2 - t1)| < GUARD_BAND, - * then the current clock sync cycle is considered pure and can be processed. - * @see Geng, Yilong, et al. - * "Exploiting a natural network effect for scalable, fine-grained clock synchronization." + * Read the specified number of bytes from the specified socket into the + * specified buffer. If a disconnect or an EOF occurs during this + * reading, then if format is non-null, report an error and exit. + * If the mutex argument is non-NULL, release the mutex before exiting. + * If format is null, then report the error, but do not exit. + * This function takes a formatted string and additional optional arguments + * similar to printf(format, ...) that is appended to the error messages. + * @param socket The socket ID. + * @param num_bytes The number of bytes to read. + * @param buffer The buffer into which to put the bytes. + * @param format A printf-style format string, followed by arguments to + * fill the string, or NULL to not exit with an error message. + * @return The number of bytes read, or 0 if an EOF is received, or + * a negative number for an error. */ -#define MSG_TYPE_CLOCK_SYNC_CODED_PROBE 22 +void read_from_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, + char* format, ...); /** - * A port absent message, informing the receiver that a given port - * will not have event for the current logical time. - * - * The next 2 bytes is the port id. - * The next 2 bytes will be the federate id of the destination federate. - * This is needed for the centralized coordination so that the RTI knows where - * to forward the message. - * The next 8 bytes are the intended time of the absent message - * The next 4 bytes are the intended microstep of the absent message + * Without blocking, peek at the specified socket and, if there is + * anything on the queue, put its first byte at the specified address and return 1. + * If there is nothing on the queue, return 0, and if an error occurs, + * return -1. + * @param socket The socket ID. + * @param result Pointer to where to put the first byte available on the socket. */ -#define MSG_TYPE_PORT_ABSENT 23 +ssize_t peek_from_socket(int socket, unsigned char* result); /** - * A message that informs the RTI about connections between this federate and - * other federates where messages are routed through the RTI. Currently, this - * only includes logical connections when the coordination is centralized. This - * information is needed for the RTI to perform the centralized coordination. - * - * @note Only information about the immediate neighbors is required. The RTI can - * transitively obtain the structure of the federation based on each federate's - * immediate neighbor information. - * - * The next 4 bytes is the number of upstream federates. - * The next 4 bytes is the number of downstream federates. - * - * Depending on the first four bytes, the next bytes are pairs of (fed ID (2 - * bytes), delay (8 bytes)) for this federate's connection to upstream federates - * (by direct connection). The delay is the minimum "after" delay of all - * connections from the upstream federate. - * - * Depending on the second four bytes, the next bytes are fed IDs (2 - * bytes each), of this federate's downstream federates (by direct connection). - * - * @note The upstream and downstream connections are transmitted on the same - * message to prevent (at least to some degree) the scenario where the RTI has - * information about one, but not the other (which is a critical error). + * Write the specified number of bytes to the specified socket from the + * specified buffer. If an error occurs, return -1 and set errno to indicate + * the cause of the error. If the write succeeds, return 0. + * This function repeats the attempt until the specified number of bytes + * have been written or an error occurs. Specifically, errors EAGAIN, + * EWOULDBLOCK, and EINTR are not considered errors and instead trigger + * another attempt. A delay between attempts is given by + * DELAY_BETWEEN_SOCKET_RETRIES. + * @param socket The socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @return 0 for success, -1 for failure. */ -#define MSG_TYPE_NEIGHBOR_STRUCTURE 24 -#define MSG_TYPE_NEIGHBOR_STRUCTURE_HEADER_SIZE 9 +int write_to_socket(int socket, size_t num_bytes, unsigned char* buffer); /** - * Byte identifying that the federate or the RTI has failed. + * Write the specified number of bytes to the specified socket using write_to_socket + * and close the socket if an error occurs. If an error occurs, this will change the + * socket ID pointed to by the first argument to -1 and will return -1. + * @param socket Pointer to the socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @return 0 for success, -1 for failure. */ -#define MSG_TYPE_FAILED 25 - -///////////////////////////////////////////// -//// Rejection codes +int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); /** - * These codes are sent in a MSG_TYPE_REJECT message. - * They are limited to one byte (uchar). - */ - -/** Federation ID does not match. */ -#define FEDERATION_ID_DOES_NOT_MATCH 1 - -/** Federate with the specified ID has already joined. */ -#define FEDERATE_ID_IN_USE 2 - -/** Federate ID out of range. */ -#define FEDERATE_ID_OUT_OF_RANGE 3 - -/** Incoming message is not expected. */ -#define UNEXPECTED_MESSAGE 4 - -/** Connected to the wrong server. */ -#define WRONG_SERVER 5 - -/** HMAC authentication failed. */ -#define HMAC_DOES_NOT_MATCH 6 - -/** RTI not executed using -a or --auth option. */ -#define RTI_NOT_EXECUTED_WITH_AUTH 7 - -#endif /* NET_COMMON_H */ + * Write the specified number of bytes to the specified socket using + * write_to_socket_close_on_error and exit with an error code if an error occurs. + * If the mutex argument is non-NULL, release the mutex before exiting. If the + * format argument is non-null, then use it an any additional arguments to form + * the error message using printf conventions. Otherwise, print a generic error + * message. + * @param socket Pointer to the socket ID. + * @param num_bytes The number of bytes to write. + * @param buffer The buffer from which to get the bytes. + * @param mutex If non-NULL, the mutex to unlock before exiting. + * @param format A format string for error messages, followed by any number of + * fields that will be used to fill the format string as in printf, or NULL + * to print a generic error message. + */ +void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, + char* format, ...); + + +/** + * Create a server and enable listening for socket connections. + * If the specified port if it is non-zero, it will attempt to acquire that port. + * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with + * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is + * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it + * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port + * number between attempts, with no delay between attempts. Once it has incremented + * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again + * with DEFAULT_PORT. + * + * @param port The port number to use or 0 to start trying at DEFAULT_PORT. + * @param socket_type The type of the socket for the server (TCP or UDP). + * @return The socket descriptor on which to accept connections. + */ +int create_rti_server(uint16_t port, socket_type_t socket_type); + +#endif /* SOCKET_COMMON_H */ \ No newline at end of file From f6a4943db2e5687fbaf2b8e800b7e8e4b7dcb16a Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:31:09 -0700 Subject: [PATCH 08/50] Remove from net_common.h --- include/core/federated/network/net_common.h | 38 --------------------- 1 file changed, 38 deletions(-) diff --git a/include/core/federated/network/net_common.h b/include/core/federated/network/net_common.h index 79ce19550..48976f1c5 100644 --- a/include/core/federated/network/net_common.h +++ b/include/core/federated/network/net_common.h @@ -179,18 +179,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #ifndef NET_COMMON_H #define NET_COMMON_H -/** - * The timeout time in ns for TCP operations. - * Default value is 10 secs. - */ -#define TCP_TIMEOUT_TIME SEC(10) - -/** - * The timeout time in ns for UDP operations. - * Default value is 1 sec. - */ -#define UDP_TIMEOUT_TIME SEC(1) - /** * Size of the buffer used for messages sent between federates. * This is used by both the federates and the rti, so message lengths @@ -198,18 +186,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FED_COM_BUFFER_SIZE 256u -/** - * Time between a federate's attempts to connect to the RTI. - */ -#define CONNECT_RETRY_INTERVAL MSEC(500) - -/** - * Bound on the number of retries to connect to the RTI. - * A federate will retry every CONNECT_RETRY_INTERVAL seconds until - * CONNECTION_TIMEOUT expires. - */ -#define CONNECT_TIMEOUT MINUTES(1) - /** * Maximum number of port addresses that a federate will try to connect to the RTI on. * If you are using automatic ports begining at DEFAULT_PORT, this puts an upper bound @@ -225,20 +201,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define ADDRESS_QUERY_RETRY_INTERVAL MSEC(250) -/** - * Time to wait before re-attempting to bind to a port. - * When a process closes, the network stack typically waits between 30 and 120 - * seconds before releasing the port. This is to allow for delayed packets so - * that a new process does not receive packets from a previous process. - * Here, we limit the retries to 60 seconds. - */ -#define PORT_BIND_RETRY_INTERVAL SEC(1) - -/** - * Number of attempts to bind to a port before giving up. - */ -#define PORT_BIND_RETRY_LIMIT 60 - /** * Default port number for the RTI. * Unless a specific port has been specified by the LF program in the "at" From 5882917d59b1fd094a080592cae9f1949c418a8f Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:35:40 -0700 Subject: [PATCH 09/50] Move read_write socket functions to socket_common.c --- include/core/federated/network/net_util.h | 119 +--------------------- 1 file changed, 1 insertion(+), 118 deletions(-) diff --git a/include/core/federated/network/net_util.h b/include/core/federated/network/net_util.h index 24b4782f9..1353200c7 100644 --- a/include/core/federated/network/net_util.h +++ b/include/core/federated/network/net_util.h @@ -51,9 +51,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "low_level_platform.h" #include "tag.h" -#define NUM_SOCKET_RETRIES 10 -#define DELAY_BETWEEN_SOCKET_RETRIES MSEC(100) - #define HOST_LITTLE_ENDIAN 1 #define HOST_BIG_ENDIAN 2 @@ -64,121 +61,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. int host_is_big_endian(void); #ifdef FEDERATED - -/** - * Mutex protecting socket close operations. - */ -extern lf_mutex_t socket_mutex; - -/** - * @brief Create an IPv4 TCP socket with Nagle's algorithm disabled - * (TCP_NODELAY) and Delayed ACKs disabled (TCP_QUICKACK). Exits application - * on any error. - * - * @return The socket ID (a file descriptor). - */ -int create_real_time_tcp_socket_errexit(); - -/** - * Read the specified number of bytes from the specified socket into the specified buffer. - * If an error occurs during this reading, return -1 and set errno to indicate - * the cause of the error. If the read succeeds in reading the specified number of bytes, - * return 0. If an EOF occurs before reading the specified number of bytes, return 1. - * This function repeats the read attempt until the specified number of bytes - * have been read, an EOF is read, or an error occurs. Specifically, errors EAGAIN, - * EWOULDBLOCK, and EINTR are not considered errors and instead trigger - * another attempt. A delay between attempts is given by DELAY_BETWEEN_SOCKET_RETRIES. - * @param socket The socket ID. - * @param num_bytes The number of bytes to read. - * @param buffer The buffer into which to put the bytes. - * @return 0 for success, 1 for EOF, and -1 for an error. - */ -int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer); - -/** - * Read the specified number of bytes to the specified socket using read_from_socket - * and close the socket if an error occurs. If an error occurs, this will change the - * socket ID pointed to by the first argument to -1 and will return -1. - * @param socket Pointer to the socket ID. - * @param num_bytes The number of bytes to write. - * @param buffer The buffer from which to get the bytes. - * @return 0 for success, -1 for failure. - */ -int read_from_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); - -/** - * Read the specified number of bytes from the specified socket into the - * specified buffer. If a disconnect or an EOF occurs during this - * reading, then if format is non-null, report an error and exit. - * If the mutex argument is non-NULL, release the mutex before exiting. - * If format is null, then report the error, but do not exit. - * This function takes a formatted string and additional optional arguments - * similar to printf(format, ...) that is appended to the error messages. - * @param socket The socket ID. - * @param num_bytes The number of bytes to read. - * @param buffer The buffer into which to put the bytes. - * @param format A printf-style format string, followed by arguments to - * fill the string, or NULL to not exit with an error message. - * @return The number of bytes read, or 0 if an EOF is received, or - * a negative number for an error. - */ -void read_from_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, - char* format, ...); - -/** - * Without blocking, peek at the specified socket and, if there is - * anything on the queue, put its first byte at the specified address and return 1. - * If there is nothing on the queue, return 0, and if an error occurs, - * return -1. - * @param socket The socket ID. - * @param result Pointer to where to put the first byte available on the socket. - */ -ssize_t peek_from_socket(int socket, unsigned char* result); - -/** - * Write the specified number of bytes to the specified socket from the - * specified buffer. If an error occurs, return -1 and set errno to indicate - * the cause of the error. If the write succeeds, return 0. - * This function repeats the attempt until the specified number of bytes - * have been written or an error occurs. Specifically, errors EAGAIN, - * EWOULDBLOCK, and EINTR are not considered errors and instead trigger - * another attempt. A delay between attempts is given by - * DELAY_BETWEEN_SOCKET_RETRIES. - * @param socket The socket ID. - * @param num_bytes The number of bytes to write. - * @param buffer The buffer from which to get the bytes. - * @return 0 for success, -1 for failure. - */ -int write_to_socket(int socket, size_t num_bytes, unsigned char* buffer); - -/** - * Write the specified number of bytes to the specified socket using write_to_socket - * and close the socket if an error occurs. If an error occurs, this will change the - * socket ID pointed to by the first argument to -1 and will return -1. - * @param socket Pointer to the socket ID. - * @param num_bytes The number of bytes to write. - * @param buffer The buffer from which to get the bytes. - * @return 0 for success, -1 for failure. - */ -int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer); - -/** - * Write the specified number of bytes to the specified socket using - * write_to_socket_close_on_error and exit with an error code if an error occurs. - * If the mutex argument is non-NULL, release the mutex before exiting. If the - * format argument is non-null, then use it an any additional arguments to form - * the error message using printf conventions. Otherwise, print a generic error - * message. - * @param socket Pointer to the socket ID. - * @param num_bytes The number of bytes to write. - * @param buffer The buffer from which to get the bytes. - * @param mutex If non-NULL, the mutex to unlock before exiting. - * @param format A format string for error messages, followed by any number of - * fields that will be used to fill the format string as in printf, or NULL - * to print a generic error message. - */ -void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, - char* format, ...); +#include "socket_common.h" #endif // FEDERATED From a4313a163e5ced43a6add802c69b029ae94ecf88 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:36:53 -0700 Subject: [PATCH 10/50] Add socket_common.h header to clock-sync.c --- core/federated/clock-sync.c | 1 + 1 file changed, 1 insertion(+) diff --git a/core/federated/clock-sync.c b/core/federated/clock-sync.c index b18efb650..741182735 100644 --- a/core/federated/clock-sync.c +++ b/core/federated/clock-sync.c @@ -42,6 +42,7 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "clock-sync.h" #include "net_common.h" #include "net_util.h" +#include "socket_common.h" #include "util.h" /** Offset calculated by the clock synchronization algorithm. */ From ed305145666e2d800f417dd3b217b6560059325e Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:37:13 -0700 Subject: [PATCH 11/50] Add socket_common.c to CMakeLists.txt --- core/federated/network/CMakeLists.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/core/federated/network/CMakeLists.txt b/core/federated/network/CMakeLists.txt index 5306eae02..f61d69897 100644 --- a/core/federated/network/CMakeLists.txt +++ b/core/federated/network/CMakeLists.txt @@ -1,4 +1,4 @@ -set(LF_NETWORK_FILES net_util.c) +set(LF_NETWORK_FILES net_util.c socket_common.c) list(TRANSFORM LF_NETWORK_FILES PREPEND federated/network/) list(APPEND REACTORC_SOURCES ${LF_NETWORK_FILES}) From 5bed39e090a4ca040c3b1978d6a013c21230e280 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:37:59 -0700 Subject: [PATCH 12/50] Move read_write socket functions to socket_common.c --- core/federated/network/net_util.c | 178 ------------------------------ 1 file changed, 178 deletions(-) diff --git a/core/federated/network/net_util.c b/core/federated/network/net_util.c index 61d4804bd..bcea05495 100644 --- a/core/federated/network/net_util.c +++ b/core/federated/network/net_util.c @@ -45,184 +45,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "net_util.h" #include "util.h" -// Define socket functions only for federated execution. -#ifdef FEDERATED -#include // Defines read(), write(), and close() - -#ifndef NUMBER_OF_FEDERATES -#define NUMBER_OF_FEDERATES 1 -#endif - -/** Number of nanoseconds to sleep before retrying a socket read. */ -#define SOCKET_READ_RETRY_INTERVAL 1000000 - -// Mutex lock held while performing socket close operations. -// A deadlock can occur if two threads simulataneously attempt to close the same socket. -lf_mutex_t socket_mutex; - -int create_real_time_tcp_socket_errexit() { - int sock = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); - if (sock < 0) { - lf_print_error_system_failure("Could not open TCP socket."); - } - // Disable Nagle's algorithm which bundles together small TCP messages to - // reduce network traffic. - // TODO: Re-consider if we should do this, and whether disabling delayed ACKs - // is enough. - int flag = 1; - int result = setsockopt(sock, IPPROTO_TCP, TCP_NODELAY, &flag, sizeof(int)); - - if (result < 0) { - lf_print_error_system_failure("Failed to disable Nagle algorithm on socket server."); - } - -#if defined(PLATFORM_Linux) - // Disable delayed ACKs. Only possible on Linux - result = setsockopt(sock, IPPROTO_TCP, TCP_QUICKACK, &flag, sizeof(int)); - - if (result < 0) { - lf_print_error_system_failure("Failed to disable Nagle algorithm on socket server."); - } -#endif // Linux - - return sock; -} - -int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { - if (socket < 0) { - // Socket is not open. - errno = EBADF; - return -1; - } - ssize_t bytes_read = 0; - while (bytes_read < (ssize_t)num_bytes) { - ssize_t more = read(socket, buffer + bytes_read, num_bytes - (size_t)bytes_read); - if (more < 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) { - // Those error codes set by the socket indicates - // that we should try again (@see man errno). - LF_PRINT_DEBUG("Reading from socket %d failed with error: `%s`. Will try again.", socket, strerror(errno)); - lf_sleep(DELAY_BETWEEN_SOCKET_RETRIES); - continue; - } else if (more < 0) { - // A more serious error occurred. - lf_print_error("Reading from socket %d failed. With error: `%s`", socket, strerror(errno)); - return -1; - } else if (more == 0) { - // EOF received. - return 1; - } - bytes_read += more; - } - return 0; -} - -int read_from_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer) { - assert(socket); - int read_failed = read_from_socket(*socket, num_bytes, buffer); - if (read_failed) { - // Read failed. - // Socket has probably been closed from the other side. - // Shut down and close the socket from this side. - shutdown(*socket, SHUT_RDWR); - close(*socket); - // Mark the socket closed. - *socket = -1; - return -1; - } - return 0; -} - -void read_from_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, - char* format, ...) { - va_list args; - assert(socket); - int read_failed = read_from_socket_close_on_error(socket, num_bytes, buffer); - if (read_failed) { - // Read failed. - if (mutex != NULL) { - LF_MUTEX_UNLOCK(mutex); - } - if (format != NULL) { - va_start(args, format); - lf_print_error_system_failure(format, args); - va_end(args); - } else { - lf_print_error_system_failure("Failed to read from socket."); - } - } -} - -ssize_t peek_from_socket(int socket, unsigned char* result) { - ssize_t bytes_read = recv(socket, result, 1, MSG_DONTWAIT | MSG_PEEK); - if (bytes_read < 0 && (errno == EAGAIN || errno == EWOULDBLOCK)) - return 0; - else - return bytes_read; -} - -int write_to_socket(int socket, size_t num_bytes, unsigned char* buffer) { - if (socket < 0) { - // Socket is not open. - errno = EBADF; - return -1; - } - ssize_t bytes_written = 0; - while (bytes_written < (ssize_t)num_bytes) { - ssize_t more = write(socket, buffer + bytes_written, num_bytes - (size_t)bytes_written); - if (more <= 0 && (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR)) { - // The error codes EAGAIN or EWOULDBLOCK indicate - // that we should try again (@see man errno). - // The error code EINTR means the system call was interrupted before completing. - LF_PRINT_DEBUG("Writing to socket %d was blocked. Will try again.", socket); - lf_sleep(DELAY_BETWEEN_SOCKET_RETRIES); - continue; - } else if (more < 0) { - // A more serious error occurred. - lf_print_error("Writing to socket %d failed. With error: `%s`", socket, strerror(errno)); - return -1; - } - bytes_written += more; - } - return 0; -} - -int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* buffer) { - assert(socket); - int result = write_to_socket(*socket, num_bytes, buffer); - if (result) { - // Write failed. - // Socket has probably been closed from the other side. - // Shut down and close the socket from this side. - shutdown(*socket, SHUT_RDWR); - close(*socket); - // Mark the socket closed. - *socket = -1; - } - return result; -} - -void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, - char* format, ...) { - va_list args; - assert(socket); - int result = write_to_socket_close_on_error(socket, num_bytes, buffer); - if (result) { - // Write failed. - if (mutex != NULL) { - LF_MUTEX_UNLOCK(mutex); - } - if (format != NULL) { - va_start(args, format); - lf_print_error_system_failure(format, args); - va_end(args); - } else { - lf_print_error("Failed to write to socket. Closing it."); - } - } -} - -#endif // FEDERATED - // Below are more generally useful functions. void encode_int64(int64_t data, unsigned char* buffer) { From ba6c35bfe7d7c2affb22f696d84b9deec6636c4e Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:38:28 -0700 Subject: [PATCH 13/50] Move socket_type to socket_common.h --- core/federated/RTI/rti_remote.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/federated/RTI/rti_remote.h b/core/federated/RTI/rti_remote.h index 8d3012b95..173c4522c 100644 --- a/core/federated/RTI/rti_remote.h +++ b/core/federated/RTI/rti_remote.h @@ -22,6 +22,7 @@ #include // Defines read(), write(), and close() #include // Defines bzero(). +#include "net_util.h" #include "rti_common.h" #ifdef __RTI_AUTH__ @@ -38,7 +39,7 @@ ///////////////////////////////////////////// //// Data structures -typedef enum socket_type_t { TCP, UDP } socket_type_t; + /** * Information about a federate known to the RTI, including its runtime state, From e4363f24d0d839239dad24b536e51bdda4f54d70 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 11:39:06 -0700 Subject: [PATCH 14/50] Move create_rti_server to socket_common.c --- core/federated/RTI/rti_remote.c | 126 ------ core/federated/network/socket_common.c | 517 +++++++------------------ 2 files changed, 134 insertions(+), 509 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 3daaeff7e..b6ab2476c 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -52,132 +52,6 @@ extern int lf_critical_section_enter(environment_t* env) { return lf_mutex_lock( extern int lf_critical_section_exit(environment_t* env) { return lf_mutex_unlock(&rti_mutex); } -/** - * Create a server and enable listening for socket connections. - * If the specified port if it is non-zero, it will attempt to acquire that port. - * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with - * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is - * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it - * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port - * number between attempts, with no delay between attempts. Once it has incremented - * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again - * with DEFAULT_PORT. - * - * @param port The port number to use or 0 to start trying at DEFAULT_PORT. - * @param socket_type The type of the socket for the server (TCP or UDP). - * @return The socket descriptor on which to accept connections. - */ -static int create_rti_server(uint16_t port, socket_type_t socket_type) { - // Timeout time for the communications of the server - struct timeval timeout_time = {.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = -1; - if (socket_type == TCP) { - socket_descriptor = create_real_time_tcp_socket_errexit(); - } else if (socket_type == UDP) { - socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - // Set the appropriate timeout time - timeout_time = - (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; - } - if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create RTI socket."); - } - - // Set the option for this socket to reuse the same address - int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { - lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); - } - // Set the timeout on the socket so that read and write operations don't block for too long - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); - } - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); - } - - /* - * The following used to permit reuse of a port that an RTI has previously - * used that has not been released. We no longer do this, and instead retry - * some number of times after waiting. - - // SO_REUSEPORT (since Linux 3.9) - // Permits multiple AF_INET or AF_INET6 sockets to be bound to an - // identical socket address. This option must be set on each - // socket (including the first socket) prior to calling bind(2) - // on the socket. To prevent port hijacking, all of the - // processes binding to the same address must have the same - // effective UID. This option can be employed with both TCP and - // UDP sockets. - - int reuse = 1; - #ifdef SO_REUSEPORT - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEPORT, - (const char*)&reuse, sizeof(reuse)) < 0) { - perror("setsockopt(SO_REUSEPORT) failed"); - } - #endif - */ - - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server address structure. - bzero((char*)&server_fd, sizeof(server_fd)); - - uint16_t specified_port = port; - if (specified_port == 0) - port = DEFAULT_PORT; - - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(port); - - int result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); - - // Try repeatedly to bind to a port. If no specific port is specified, then - // increment the port number each time. - - int count = 1; - while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { - if (specified_port == 0) { - lf_print_warning("RTI failed to get port %d.", port); - port++; - if (port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) - port = DEFAULT_PORT; - lf_print_warning("RTI will try again with port %d.", port); - server_fd.sin_port = htons(port); - // Do not sleep. - } else { - lf_print("RTI failed to get port %d. Will try again.", port); - lf_sleep(PORT_BIND_RETRY_INTERVAL); - } - result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); - } - if (result != 0) { - lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", port); - } - char* type = "TCP"; - if (socket_type == UDP) { - type = "UDP"; - } - lf_print("RTI using %s port %d for federation %s.", type, port, rti_remote->federation_id); - - if (socket_type == TCP) { - rti_remote->final_port_TCP = port; - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); - } else if (socket_type == UDP) { - rti_remote->final_port_UDP = port; - // No need to listen on the UDP socket - } - - return socket_descriptor; -} - void notify_tag_advance_grant(scheduling_node_t* e, tag_t tag) { if (e->state == NOT_CONNECTED || lf_tag_compare(tag, e->last_granted) <= 0 || lf_tag_compare(tag, e->last_provisionally_granted) < 0) { diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 61d4804bd..3fb01cfe1 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -1,53 +1,29 @@ -/** - * @file - * @author Edward A. Lee (eal@berkeley.edu) - * @author Soroush Bateni (soroush@utdallas.edu) - * - * @section LICENSE -Copyright (c) 2020, The University of California at Berkeley. - -Redistribution and use in source and binary forms, with or without modification, -are permitted provided that the following conditions are met: - -1. Redistributions of source code must retain the above copyright notice, - this list of conditions and the following disclaimer. +// #include /* htons */ +// #include +// #include // IPPROTO_TCP, IPPROTO_UDP +// #include // TCP_NODELAY +// #include +// #include +// #include +// #include +// #include +// #include +// #include +// #include + +// #include "util.h" +// #include "net_common.h" +// #include "net_util.h" +// #include "socket_common.h" -2. Redistributions in binary form must reproduce the above copyright notice, - this list of conditions and the following disclaimer in the documentation - and/or other materials provided with the distribution. -THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY -EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF -MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL -THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, -SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS -INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, -STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF -THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - * @section DESCRIPTION - * Utility functions for a federate in a federated execution. - */ - -#include -#include -#include -#include // For sqrtl() and powl -#include // Defines va_list -#include -#include -#include // Defines memcpy() -#include // Defines nanosleep() +#include // Defines read(), write(), and close() #include // IPPROTO_TCP, IPPROTO_UDP #include // TCP_NODELAY - -#include "net_util.h" -#include "util.h" - -// Define socket functions only for federated execution. -#ifdef FEDERATED -#include // Defines read(), write(), and close() +#include +#include +#include //va_list +#include "socket_common.h" #ifndef NUMBER_OF_FEDERATES #define NUMBER_OF_FEDERATES 1 @@ -88,6 +64,118 @@ int create_real_time_tcp_socket_errexit() { return sock; } +/** + * Create a server and enable listening for socket connections. + * If the specified port if it is non-zero, it will attempt to acquire that port. + * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with + * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is + * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it + * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port + * number between attempts, with no delay between attempts. Once it has incremented + * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again + * with DEFAULT_PORT. + * + * @param port The port number to use or 0 to start trying at DEFAULT_PORT. + * @param socket_type The type of the socket for the server (TCP or UDP). + * @return The socket descriptor on which to accept connections. + */ +int create_rti_server(uint16_t port, socket_type_t socket_type) { + + // Create an IPv4 socket for TCP (not UDP) communication over IP (0). + int socket_descriptor = -1; + if (socket_type == TCP) { + socket_descriptor = create_real_time_tcp_socket_errexit(); + } else if (socket_type == UDP) { + socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + } + if (socket_descriptor < 0) { + lf_print_error_system_failure("Failed to create RTI socket."); + } + set_socket_timeout_option(socket_descriptor, socket_type); + int final_port = set_socket_bind_option(socket_descriptor, port); + + + char* type = (socket_type == TCP) ? "TCP" : "UDP"; + lf_print("RTI using %s port %d for federation %s.", type, port, rti_remote->federation_id); + + if (socket_type == TCP) { + rti_remote->final_port_TCP = port; + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); + } else if (socket_type == UDP) { + rti_remote->final_port_UDP = port; + // No need to listen on the UDP socket + } + + return socket_descriptor; +} + +static void set_socket_timeout_option(int socket_descriptor, socket_type_t socket_type) { + // Timeout time for the communications of the server + if (socket_type == TCP) { + struct timeval timeout_time = {.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; + } else if (socket_type == UDP) { + // Set the appropriate timeout time + timeout_time = + (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; + } + // Set the option for this socket to reuse the same address + int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { + lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); + } + // Set the timeout on the socket so that read and write operations don't block for too long + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { + lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); + } + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { + lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); + } +} + +static int set_socket_bind_option(int socket_descriptor, int port, socket_type_t socket_type) { + // Server file descriptor. + struct sockaddr_in server_fd; + // Zero out the server address structure. + bzero((char*)&server_fd, sizeof(server_fd)); + + uint16_t specified_port = port; + if (specified_port == 0){ + port = DEFAULT_PORT; + } + server_fd.sin_family = AF_INET; // IPv4 + server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. + // Convert the port number from host byte order to network byte order. + server_fd.sin_port = htons(port); + + int result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); + + // Try repeatedly to bind to a port. If no specific port is specified, then + // increment the port number each time. + + int count = 1; + while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { + if (specified_port == 0) { + lf_print_warning("RTI failed to get port %d.", port); + port++; + if (port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) + port = DEFAULT_PORT; + lf_print_warning("RTI will try again with port %d.", port); + server_fd.sin_port = htons(port); + // Do not sleep. + } else { + lf_print("RTI failed to get port %d. Will try again.", port); + lf_sleep(PORT_BIND_RETRY_INTERVAL); + } + result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); + } + if (result != 0) { + lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", port); + } +} + int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { if (socket < 0) { // Socket is not open. @@ -220,340 +308,3 @@ void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* } } } - -#endif // FEDERATED - -// Below are more generally useful functions. - -void encode_int64(int64_t data, unsigned char* buffer) { - // This strategy is fairly brute force, but it avoids potential - // alignment problems. - int shift = 0; - for (size_t i = 0; i < sizeof(int64_t); i++) { - buffer[i] = (unsigned char)((data & (0xffLL << shift)) >> shift); - shift += 8; - } -} - -void encode_int32(int32_t data, unsigned char* buffer) { - // This strategy is fairly brute force, but it avoids potential - // alignment problems. Note that this assumes an int32_t is four bytes. - buffer[0] = (unsigned char)(data & 0xff); - buffer[1] = (unsigned char)((data & 0xff00) >> 8); - buffer[2] = (unsigned char)((data & 0xff0000) >> 16); - buffer[3] = (unsigned char)((data & (int32_t)0xff000000) >> 24); -} - -void encode_uint32(uint32_t data, unsigned char* buffer) { - // This strategy is fairly brute force, but it avoids potential - // alignment problems. Note that this assumes a uint32_t is four bytes. - buffer[0] = (unsigned char)(data & 0xff); - buffer[1] = (unsigned char)((data & 0xff00) >> 8); - buffer[2] = (unsigned char)((data & 0xff0000) >> 16); - buffer[3] = (unsigned char)((data & (uint32_t)0xff000000) >> 24); -} - -void encode_uint16(uint16_t data, unsigned char* buffer) { - // This strategy is fairly brute force, but it avoids potential - // alignment problems. Note that this assumes a short is two bytes. - buffer[0] = (unsigned char)(data & 0xff); - buffer[1] = (unsigned char)((data & 0xff00) >> 8); -} - -int host_is_big_endian() { - static int host = 0; - union { - uint32_t uint; - unsigned char c[sizeof(uint32_t)]; - } x; - if (host == 0) { - // Determine the endianness of the host by setting the low-order bit. - x.uint = 0x01; - host = (x.c[3] == 0x01) ? HOST_BIG_ENDIAN : HOST_LITTLE_ENDIAN; - } - return (host == HOST_BIG_ENDIAN); -} - -int32_t swap_bytes_if_big_endian_int32(int32_t src) { - union { - int32_t uint; - unsigned char c[sizeof(int32_t)]; - } x; - if (!host_is_big_endian()) - return src; - // printf("DEBUG: Host is little endian.\n"); - x.uint = src; - // printf("DEBUG: Before swapping bytes: %lld.\n", x.ull); - unsigned char c; - // Swap bytes. - c = x.c[0]; - x.c[0] = x.c[3]; - x.c[3] = c; - c = x.c[1]; - x.c[1] = x.c[2]; - x.c[2] = c; - // printf("DEBUG: After swapping bytes: %lld.\n", x.ull); - return x.uint; -} - -uint32_t swap_bytes_if_big_endian_uint32(uint32_t src) { - union { - uint32_t uint; - unsigned char c[sizeof(uint32_t)]; - } x; - if (!host_is_big_endian()) - return src; - // printf("DEBUG: Host is little endian.\n"); - x.uint = src; - // printf("DEBUG: Before swapping bytes: %lld.\n", x.ull); - unsigned char c; - // Swap bytes. - c = x.c[0]; - x.c[0] = x.c[3]; - x.c[3] = c; - c = x.c[1]; - x.c[1] = x.c[2]; - x.c[2] = c; - // printf("DEBUG: After swapping bytes: %lld.\n", x.ull); - return x.uint; -} - -int64_t swap_bytes_if_big_endian_int64(int64_t src) { - union { - int64_t ull; - unsigned char c[sizeof(int64_t)]; - } x; - if (!host_is_big_endian()) - return src; - // printf("DEBUG: Host is little endian.\n"); - x.ull = src; - // printf("DEBUG: Before swapping bytes: %lld.\n", x.ull); - unsigned char c; - // Swap bytes. - c = x.c[0]; - x.c[0] = x.c[7]; - x.c[7] = c; - c = x.c[1]; - x.c[1] = x.c[6]; - x.c[6] = c; - c = x.c[2]; - x.c[2] = x.c[5]; - x.c[5] = c; - c = x.c[3]; - x.c[3] = x.c[4]; - x.c[4] = c; - // printf("DEBUG: After swapping bytes: %lld.\n", x.ull); - return x.ull; -} - -uint16_t swap_bytes_if_big_endian_uint16(uint16_t src) { - union { - uint16_t uint; - unsigned char c[sizeof(uint16_t)]; - } x; - if (!host_is_big_endian()) - return src; - // printf("DEBUG: Host is little endian.\n"); - x.uint = src; - // printf("DEBUG: Before swapping bytes: %lld.\n", x.ull); - unsigned char c; - // Swap bytes. - c = x.c[0]; - x.c[0] = x.c[1]; - x.c[1] = c; - // printf("DEBUG: After swapping bytes: %lld.\n", x.ull); - return x.uint; -} - -int32_t extract_int32(unsigned char* bytes) { - // Use memcpy to prevent possible alignment problems on some processors. - union { - int32_t uint; - unsigned char c[sizeof(int32_t)]; - } result; - memcpy(&result.c, bytes, sizeof(int32_t)); - return swap_bytes_if_big_endian_int32(result.uint); -} - -uint32_t extract_uint32(unsigned char* bytes) { - // Use memcpy to prevent possible alignment problems on some processors. - union { - uint32_t uint; - unsigned char c[sizeof(uint32_t)]; - } result; - memcpy(&result.c, bytes, sizeof(uint32_t)); - return swap_bytes_if_big_endian_uint32(result.uint); -} - -int64_t extract_int64(unsigned char* bytes) { - // Use memcpy to prevent possible alignment problems on some processors. - union { - int64_t ull; - unsigned char c[sizeof(int64_t)]; - } result; - memcpy(&result.c, bytes, sizeof(int64_t)); - return swap_bytes_if_big_endian_int64(result.ull); -} - -uint16_t extract_uint16(unsigned char* bytes) { - // Use memcpy to prevent possible alignment problems on some processors. - union { - uint16_t ushort; - unsigned char c[sizeof(uint16_t)]; - } result; - memcpy(&result.c, bytes, sizeof(uint16_t)); - return swap_bytes_if_big_endian_uint16(result.ushort); -} - -#ifdef FEDERATED - -void extract_header(unsigned char* buffer, uint16_t* port_id, uint16_t* federate_id, size_t* length) { - // The first two bytes are the ID of the destination reactor. - *port_id = extract_uint16(buffer); - - // The next two bytes are the ID of the destination federate. - *federate_id = extract_uint16(&(buffer[sizeof(uint16_t)])); - - // printf("DEBUG: Message for port %d of federate %d.\n", *port_id, *federate_id); - - // The next four bytes are the message length. - uint32_t local_length_signed = extract_uint32(&(buffer[sizeof(uint16_t) + sizeof(uint16_t)])); - *length = (size_t)local_length_signed; - - // printf("DEBUG: Federate receiving message to port %d to federate %d of length %d.\n", port_id, federate_id, - // length); -} - -void extract_timed_header(unsigned char* buffer, uint16_t* port_id, uint16_t* federate_id, size_t* length, tag_t* tag) { - extract_header(buffer, port_id, federate_id, length); - - tag_t temporary_tag = extract_tag(&(buffer[sizeof(uint16_t) + sizeof(uint16_t) + sizeof(int32_t)])); - tag->time = temporary_tag.time; - tag->microstep = temporary_tag.microstep; -} - -tag_t extract_tag(unsigned char* buffer) { - tag_t tag; - tag.time = extract_int64(buffer); - tag.microstep = extract_uint32(&(buffer[sizeof(int64_t)])); - - return tag; -} - -void encode_tag(unsigned char* buffer, tag_t tag) { - encode_int64(tag.time, buffer); - encode_uint32(tag.microstep, &(buffer[sizeof(int64_t)])); -} - -bool match_regex(const char* str, char* regex) { - regex_t regex_compiled; - regmatch_t group; - bool valid = false; - - if (regcomp(®ex_compiled, regex, REG_EXTENDED)) { - lf_print_error("Could not compile regex to parse RTI address"); - return valid; - } - - // regexec returns 0 when a match is found. - if (regexec(®ex_compiled, str, 1, &group, 0) == 0) { - valid = true; - } - regfree(®ex_compiled); - return valid; -} - -bool validate_port(char* port) { - // magic number 6 since port range is [0, 65535] - int port_len = strnlen(port, 6); - if (port_len < 1 || port_len > 5) { - return false; - } - - for (int i = 0; i < port_len; i++) { - if (!isdigit(port[i])) { - return false; - } - } - int port_number = atoi(port); - return port_number >= 0 && port_number <= 65535; -} - -bool validate_host(const char* host) { - // regex taken from LFValidator.xtend - char* ipv4_regex = "((25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])\\.){3,3}(25[0-5]|(2[0-4]|1{0,1}[0-9]){0,1}[0-9])"; - char* host_or_FQN_regex = "^([a-z0-9]+(-[a-z0-9]+)*)|(([a-z0-9]+(-[a-z0-9]+)*\\.)+[a-z]{2,})$"; - return match_regex(host, ipv4_regex) || match_regex(host, host_or_FQN_regex); -} - -bool validate_user(const char* user) { - // regex taken from LFValidator.xtend - char* username_regex = "^[a-z_]([a-z0-9_-]{0,31}|[a-z0-9_-]{0,30}\\$)$"; - return match_regex(user, username_regex); -} - -bool extract_match_group(const char* rti_addr, char* dest, regmatch_t group, size_t max_len, size_t min_len, - const char* err_msg) { - size_t size = group.rm_eo - group.rm_so; - if (size > max_len || size < min_len) { - lf_print_error("%s", err_msg); - return false; - } - strncpy(dest, &rti_addr[group.rm_so], size); - dest[size] = '\0'; - return true; -} - -bool extract_match_groups(const char* rti_addr, char** rti_addr_strs, bool** rti_addr_flags, regmatch_t* group_array, - int* gids, size_t* max_lens, size_t* min_lens, const char** err_msgs) { - for (int i = 0; i < 3; i++) { - if (group_array[gids[i]].rm_so != -1) { - if (!extract_match_group(rti_addr, rti_addr_strs[i], group_array[gids[i]], max_lens[i], min_lens[i], - err_msgs[i])) { - return false; - } else { - *rti_addr_flags[i] = true; - } - } - } - return true; -} - -void extract_rti_addr_info(const char* rti_addr, rti_addr_info_t* rti_addr_info) { - const char* regex_str = "(([a-zA-Z0-9_-]{1,254})@)?([a-zA-Z0-9._-]{1,255})(:([0-9]{1,5}))?"; - size_t max_groups = 6; - // The group indices of each field of interest in the regex. - int user_gid = 2, host_gid = 3, port_gid = 5; - int gids[3] = {user_gid, host_gid, port_gid}; - char* rti_addr_strs[3] = {rti_addr_info->rti_user_str, rti_addr_info->rti_host_str, rti_addr_info->rti_port_str}; - bool* rti_addr_flags[3] = {&rti_addr_info->has_user, &rti_addr_info->has_host, &rti_addr_info->has_port}; - size_t max_lens[3] = {255, 255, 5}; - size_t min_lens[3] = {1, 1, 1}; - const char* err_msgs[3] = {"User name must be between 1 to 255 characters long.", - "Host must be between 1 to 255 characters long.", - "Port must be between 1 to 5 characters long."}; - - regex_t regex_compiled; - regmatch_t group_array[max_groups]; - - if (regcomp(®ex_compiled, regex_str, REG_EXTENDED)) { - lf_print_error("Could not compile regex to parse RTI address"); - return; - } - - if (regexec(®ex_compiled, rti_addr, max_groups, group_array, 0) == 0) { - // Check for matched username. group_array[0] is the entire matched string. - for (size_t i = 1; i < max_groups; i++) { - // Annoyingly, the rm_so and rm_eo fields are long long on some platforms and int on others. - // To suppress warnings, cast to long long - LF_PRINT_DEBUG("runtime rti_addr regex: so: %lld eo: %lld\n", (long long)group_array[i].rm_so, - (long long)group_array[i].rm_eo); - } - if (!extract_match_groups(rti_addr, rti_addr_strs, rti_addr_flags, group_array, gids, max_lens, min_lens, - err_msgs)) { - memset(rti_addr_info, 0, sizeof(rti_addr_info_t)); - } - } - regfree(®ex_compiled); -} -#endif From f1aaa85c0d633fca471055f8f1b0d18931af2573 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 12:33:38 -0700 Subject: [PATCH 15/50] Fix create_rti_server --- core/federated/RTI/CMakeLists.txt | 1 + core/federated/RTI/rti_remote.c | 5 +- core/federated/network/socket_common.c | 95 ++++++++----------- include/core/federated/network/net_util.h | 7 +- .../core/federated/network/socket_common.h | 6 +- 5 files changed, 50 insertions(+), 64 deletions(-) diff --git a/core/federated/RTI/CMakeLists.txt b/core/federated/RTI/CMakeLists.txt index 5bfbf0196..d9a93c246 100644 --- a/core/federated/RTI/CMakeLists.txt +++ b/core/federated/RTI/CMakeLists.txt @@ -17,6 +17,7 @@ add_library(${RTI_LIB} STATIC ${CoreLib}/tag.c ${CoreLib}/clock.c ${CoreLib}/federated/network/net_util.c + ${CoreLib}/federated/network/socket_common.c ${CoreLib}/utils/pqueue_base.c ${CoreLib}/utils/pqueue_tag.c ${CoreLib}/utils/pqueue.c diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index b6ab2476c..35dc5634b 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1529,12 +1529,13 @@ void initialize_federate(federate_info_t* fed, uint16_t id) { int32_t start_rti_server(uint16_t port) { _lf_initialize_clock(); // Create the TCP socket server - rti_remote->socket_descriptor_TCP = create_rti_server(port, TCP); + create_rti_server(port, TCP, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP); lf_print("RTI: Listening for federates."); // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - rti_remote->socket_descriptor_UDP = create_rti_server(rti_remote->final_port_TCP + 1, UDP); + create_rti_server(rti_remote->final_port_TCP + 1, UDP, &rti_remote->socket_descriptor_UDP, + &rti_remote->final_port_UDP); } return rti_remote->socket_descriptor_TCP; } diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 3fb01cfe1..6e4773230 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -16,13 +16,15 @@ // #include "net_util.h" // #include "socket_common.h" - -#include // Defines read(), write(), and close() +#include // Defines read(), write(), and close() #include // IPPROTO_TCP, IPPROTO_UDP #include // TCP_NODELAY #include #include #include //va_list +#include // strerror + +#include "util.h" #include "socket_common.h" #ifndef NUMBER_OF_FEDERATES @@ -64,64 +66,18 @@ int create_real_time_tcp_socket_errexit() { return sock; } -/** - * Create a server and enable listening for socket connections. - * If the specified port if it is non-zero, it will attempt to acquire that port. - * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with - * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is - * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it - * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port - * number between attempts, with no delay between attempts. Once it has incremented - * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again - * with DEFAULT_PORT. - * - * @param port The port number to use or 0 to start trying at DEFAULT_PORT. - * @param socket_type The type of the socket for the server (TCP or UDP). - * @return The socket descriptor on which to accept connections. - */ -int create_rti_server(uint16_t port, socket_type_t socket_type) { - - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = -1; - if (socket_type == TCP) { - socket_descriptor = create_real_time_tcp_socket_errexit(); - } else if (socket_type == UDP) { - socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - } - if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create RTI socket."); - } - set_socket_timeout_option(socket_descriptor, socket_type); - int final_port = set_socket_bind_option(socket_descriptor, port); - - - char* type = (socket_type == TCP) ? "TCP" : "UDP"; - lf_print("RTI using %s port %d for federation %s.", type, port, rti_remote->federation_id); - - if (socket_type == TCP) { - rti_remote->final_port_TCP = port; - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); - } else if (socket_type == UDP) { - rti_remote->final_port_UDP = port; - // No need to listen on the UDP socket - } - - return socket_descriptor; -} - static void set_socket_timeout_option(int socket_descriptor, socket_type_t socket_type) { // Timeout time for the communications of the server + struct timeval timeout_time; if (socket_type == TCP) { - struct timeval timeout_time = {.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; + timeout_time = + (struct timeval){.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; } else if (socket_type == UDP) { // Set the appropriate timeout time timeout_time = (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; } - // Set the option for this socket to reuse the same address + // Set the option for this socket to reuse the same address int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); @@ -135,14 +91,14 @@ static void set_socket_timeout_option(int socket_descriptor, socket_type_t socke } } -static int set_socket_bind_option(int socket_descriptor, int port, socket_type_t socket_type) { +static int set_socket_bind_option(int socket_descriptor, int port) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. bzero((char*)&server_fd, sizeof(server_fd)); uint16_t specified_port = port; - if (specified_port == 0){ + if (specified_port == 0) { port = DEFAULT_PORT; } server_fd.sin_family = AF_INET; // IPv4 @@ -176,6 +132,37 @@ static int set_socket_bind_option(int socket_descriptor, int port, socket_type_t } } +int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port) { + + // Create an IPv4 socket for TCP (not UDP) communication over IP (0). + int socket_descriptor = -1; + if (socket_type == TCP) { + socket_descriptor = create_real_time_tcp_socket_errexit(); + } else if (socket_type == UDP) { + socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + } + if (socket_descriptor < 0) { + lf_print_error_system_failure("Failed to create RTI socket."); + } + set_socket_timeout_option(socket_descriptor, socket_type); + int out_port = set_socket_bind_option(socket_descriptor, port); + + char* type = (socket_type == TCP) ? "TCP" : "UDP"; + lf_print("RTI using %s port %d.", type, port); + + *final_socket = socket_descriptor; + *final_port = out_port; + + if (socket_type == TCP) { + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); + } else if (socket_type == UDP) { + // No need to listen on the UDP socket + } +} + int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { if (socket < 0) { // Socket is not open. diff --git a/include/core/federated/network/net_util.h b/include/core/federated/network/net_util.h index 1353200c7..14746b74f 100644 --- a/include/core/federated/network/net_util.h +++ b/include/core/federated/network/net_util.h @@ -60,11 +60,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ int host_is_big_endian(void); -#ifdef FEDERATED -#include "socket_common.h" - -#endif // FEDERATED - /** * Write the specified data as a sequence of bytes starting * at the specified address. This encodes the data in little-endian @@ -161,6 +156,8 @@ uint16_t extract_uint16(unsigned char* bytes); #ifdef FEDERATED +#include "socket_common.h" + /** * Extract the core header information that all messages between * federates share. The core header information is two bytes with diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 255964b62..ee29f290e 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -175,7 +175,6 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); - /** * Create a server and enable listening for socket connections. * If the specified port if it is non-zero, it will attempt to acquire that port. @@ -189,8 +188,9 @@ void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* * * @param port The port number to use or 0 to start trying at DEFAULT_PORT. * @param socket_type The type of the socket for the server (TCP or UDP). - * @return The socket descriptor on which to accept connections. + * @param final_socket The socket descriptor on which to accept connections. + * @param final_port The final port of the TCP or UDP socket. */ -int create_rti_server(uint16_t port, socket_type_t socket_type); +int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); #endif /* SOCKET_COMMON_H */ \ No newline at end of file From c6f313f64173c12e858c8b56e68e9a8e4235c427 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 13:04:47 -0700 Subject: [PATCH 16/50] Add accept_socket --- core/federated/RTI/rti_remote.c | 21 +---------- core/federated/network/socket_common.c | 20 ++++++++++ .../core/federated/network/socket_common.h | 37 ++++++++++--------- 3 files changed, 41 insertions(+), 37 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 35dc5634b..81da116b4 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1412,25 +1412,8 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - // The following blocks until a federate connects. - int socket_id = -1; - while (1) { - socket_id = accept(rti_remote->socket_descriptor_TCP, &client_fd, &client_length); - if (socket_id >= 0) { - // Got a socket - break; - } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { - lf_print_error_system_failure("RTI failed to accept the socket."); - } else { - // Try again - lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); - continue; - } - } - + struct sockaddr* client_fd; + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, client_fd); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 6e4773230..cdc440540 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -163,6 +163,26 @@ int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socke } } +int accept_socket(int socket, struct sockaddr* client_fd) { + // Wait for an incoming connection request. + uint32_t client_length = sizeof(*client_fd); + // The following blocks until a federate connects. + int socket_id = -1; + while (1) { + socket_id = accept(socket, client_fd, &client_length); + if (socket_id >= 0) { + // Got a socket + break; + } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { + lf_print_error_system_failure("RTI failed to accept the socket."); + } else { + // Try again + lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + continue; + } + } +} + int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { if (socket < 0) { // Socket is not open. diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index ee29f290e..3921379ef 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -74,6 +74,25 @@ extern lf_mutex_t socket_mutex; */ int create_real_time_tcp_socket_errexit(); +/** + * Create a server and enable listening for socket connections. + * If the specified port if it is non-zero, it will attempt to acquire that port. + * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with + * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is + * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it + * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port + * number between attempts, with no delay between attempts. Once it has incremented + * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again + * with DEFAULT_PORT. + * + * @param port The port number to use or 0 to start trying at DEFAULT_PORT. + * @param socket_type The type of the socket for the server (TCP or UDP). + * @param final_socket The socket descriptor on which to accept connections. + * @param final_port The final port of the TCP or UDP socket. + */ +int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); + +int accept_socket(int socket, struct sockaddr* client_fd); /** * Read the specified number of bytes from the specified socket into the specified buffer. * If an error occurs during this reading, return -1 and set errno to indicate @@ -175,22 +194,4 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); -/** - * Create a server and enable listening for socket connections. - * If the specified port if it is non-zero, it will attempt to acquire that port. - * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with - * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is - * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it - * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port - * number between attempts, with no delay between attempts. Once it has incremented - * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again - * with DEFAULT_PORT. - * - * @param port The port number to use or 0 to start trying at DEFAULT_PORT. - * @param socket_type The type of the socket for the server (TCP or UDP). - * @param final_socket The socket descriptor on which to accept connections. - * @param final_port The final port of the TCP or UDP socket. - */ -int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); - #endif /* SOCKET_COMMON_H */ \ No newline at end of file From 671e29975a51eca927b4bdd3957476dcd66ee700 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 13:54:32 -0700 Subject: [PATCH 17/50] Fix minor errors --- core/federated/RTI/rti_remote.c | 4 ++-- include/core/federated/network/net_common.h | 15 --------------- include/core/federated/network/socket_common.h | 4 ++-- 3 files changed, 4 insertions(+), 19 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 81da116b4..71deb6644 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1412,8 +1412,8 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - struct sockaddr* client_fd; - int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, client_fd); + struct sockaddr client_fd; + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, &client_fd); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { diff --git a/include/core/federated/network/net_common.h b/include/core/federated/network/net_common.h index 48976f1c5..47826be3e 100644 --- a/include/core/federated/network/net_common.h +++ b/include/core/federated/network/net_common.h @@ -186,13 +186,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define FED_COM_BUFFER_SIZE 256u -/** - * Maximum number of port addresses that a federate will try to connect to the RTI on. - * If you are using automatic ports begining at DEFAULT_PORT, this puts an upper bound - * on the number of RTIs that can be running on the same host. - */ -#define MAX_NUM_PORT_ADDRESSES 16 - /** * Time that a federate waits before asking * the RTI again for the port and IP address of a federate @@ -201,14 +194,6 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. */ #define ADDRESS_QUERY_RETRY_INTERVAL MSEC(250) -/** - * Default port number for the RTI. - * Unless a specific port has been specified by the LF program in the "at" - * for the RTI or on the command line, when the RTI starts up, it will attempt - * to open a socket server on this port. - */ -#define DEFAULT_PORT 15045u - /** * Delay the start of all federates by this amount. * This helps ensure that the federates do not start at the same time. diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 3921379ef..8c2678451 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -34,7 +34,7 @@ * If you are using automatic ports begining at DEFAULT_PORT, this puts an upper bound * on the number of RTIs that can be running on the same host. */ -#define MAX_NUM_PORT_ADDRESSES 16 +#define MAX_NUM_PORT_ADDRESSES 16u /** * Time to wait before re-attempting to bind to a port. @@ -90,7 +90,7 @@ int create_real_time_tcp_socket_errexit(); * @param final_socket The socket descriptor on which to accept connections. * @param final_port The final port of the TCP or UDP socket. */ -int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); +void create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); int accept_socket(int socket, struct sockaddr* client_fd); /** From b82c3f02bd3035a2d0876334fd89d94558247535 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 14:04:57 -0700 Subject: [PATCH 18/50] Minor fix --- core/federated/network/socket_common.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index cdc440540..667730df6 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -91,7 +91,7 @@ static void set_socket_timeout_option(int socket_descriptor, socket_type_t socke } } -static int set_socket_bind_option(int socket_descriptor, int port) { +static int set_socket_bind_option(int socket_descriptor, uint16_t port) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. @@ -130,9 +130,10 @@ static int set_socket_bind_option(int socket_descriptor, int port) { if (result != 0) { lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", port); } + return port; } -int create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port) { +void create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port) { // Create an IPv4 socket for TCP (not UDP) communication over IP (0). int socket_descriptor = -1; @@ -181,6 +182,7 @@ int accept_socket(int socket, struct sockaddr* client_fd) { continue; } } + return socket_id; } int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { From a3c81deda1919cbacfcce2fac3e0a6bbc4d5c754 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 19:06:26 -0700 Subject: [PATCH 19/50] Add connect_to socket --- core/federated/federate.c | 79 +------------------ core/federated/network/socket_common.c | 55 +++++++++++++ .../core/federated/network/socket_common.h | 2 + 3 files changed, 60 insertions(+), 76 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 2bfc3656a..f285899fb 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -936,42 +936,6 @@ static int perform_hmac_authentication() { } #endif -static void close_rti_socket() { - shutdown(_fed.socket_TCP_RTI, SHUT_RDWR); - close(_fed.socket_TCP_RTI); - _fed.socket_TCP_RTI = -1; -} - -/** - * Return in the result a struct with the address info for the specified hostname and port. - * The memory for the result is dynamically allocated and must be freed using freeaddrinfo. - * @param hostname The host name. - * @param port The port number. - * @param result The struct into which to write. - */ -static void rti_address(const char* hostname, uint16_t port, struct addrinfo** result) { - struct addrinfo hints; - - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_INET; /* Allow IPv4 */ - hints.ai_socktype = SOCK_STREAM; /* Stream socket */ - hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ - hints.ai_addr = NULL; - hints.ai_next = NULL; - hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ - - // Convert port number to string. - char str[6]; - sprintf(str, "%u", port); - - // Get address structure matching hostname and hints criteria, and - // set port to the port number provided in str. There should only - // ever be one matching address structure, and we connect to that. - if (getaddrinfo(hostname, (const char*)&str, &hints, result)) { - lf_print_error_and_exit("No host for RTI matching given hostname: %s", hostname); - } -} - /** * Send the specified timestamp to the RTI and wait for a response. * The specified timestamp should be current physical time of the @@ -1914,42 +1878,12 @@ void lf_connect_to_rti(const char* hostname, int port) { // Create a socket _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); - - int result = -1; - struct addrinfo* res = NULL; + if (connect_to_socket(_fed.socket_TCP_RTI, hostname, uport, port) < 0) { + lf_print_error_and_exit("Failed to connect() to RTI."); + } instant_t start_connect = lf_time_physical(); while (!CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT) && !_lf_termination_executed) { - if (res != NULL) { - // This is a repeated attempt. - if (_fed.socket_TCP_RTI >= 0) - close_rti_socket(); - - lf_sleep(CONNECT_RETRY_INTERVAL); - - // Create a new socket. - _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); - - if (port == 0) { - // Free previously allocated address info. - freeaddrinfo(res); - // Increment the port number. - uport++; - if (uport >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) - uport = DEFAULT_PORT; - - // Reconstruct the address info. - rti_address(hostname, uport, &res); - } - lf_print("Trying RTI again on port %d.", uport); - } else { - // This is the first attempt. - rti_address(hostname, uport, &res); - } - - result = connect(_fed.socket_TCP_RTI, res->ai_addr, res->ai_addrlen); - if (result < 0) - continue; // Connect failed. // Have connected to an RTI, but not sure it's the right RTI. // Send a MSG_TYPE_FED_IDS message and wait for a reply. @@ -1962,7 +1896,6 @@ void lf_connect_to_rti(const char* hostname, int port) { continue; // Try again with a new port. } else { // No point in trying again because it will be the same port. - close_rti_socket(); lf_print_error_and_exit("Authentication failed."); } } @@ -2031,11 +1964,7 @@ void lf_connect_to_rti(const char* hostname, int port) { continue; } } - if (result < 0) { - lf_print_error_and_exit("Failed to connect to RTI with timeout: " PRINTF_TIME, CONNECT_TIMEOUT); - } - freeaddrinfo(res); /* No longer needed */ // Call a generated (external) function that sends information // about connections between this federate and other federates @@ -2051,8 +1980,6 @@ void lf_connect_to_rti(const char* hostname, int port) { encode_uint16(udp_port, &(UDP_port_number[1])); write_to_socket_fail_on_error(&_fed.socket_TCP_RTI, 1 + sizeof(uint16_t), UDP_port_number, NULL, "Failed to send the UDP port number to the RTI."); - - lf_print("Connected to RTI at %s:%d.", hostname, uport); } void lf_create_server(int specified_port) { diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 667730df6..c7c717ad0 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -20,7 +20,11 @@ #include // IPPROTO_TCP, IPPROTO_UDP #include // TCP_NODELAY #include +#include #include +#include +#include +#include #include //va_list #include // strerror @@ -317,3 +321,54 @@ void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* } } } + +int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_specified_port) { + struct addrinfo hints; + struct addrinfo* result; + int ret = -1; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; /* Allow IPv4 */ + hints.ai_socktype = SOCK_STREAM; /* Stream socket */ + hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ + hints.ai_addr = NULL; + hints.ai_next = NULL; + hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ + + int used_port = (user_specified_port == 0) ? port : user_specified_port; + + instant_t start_connect = lf_time_physical(); + // while (!_lf_termination_executed) { // Not working... + while (1) { + if (CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT)) { + lf_print_error("Failed to connect with timeout: " PRINTF_TIME ". Giving up.", CONNECT_TIMEOUT); + break; + } + // Convert port number to string. + char str[6]; + sprintf(str, "%u", used_port); + + // Get address structure matching hostname and hints criteria, and + // set port to the port number provided in str. There should only + // ever be one matching address structure, and we connect to that. + if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) { + lf_print_error("No host matching given hostname: %s", hostname); + break; + } + ret = connect(sock, result->ai_addr, result->ai_addrlen); + if (ret < 0) { + lf_sleep(CONNECT_RETRY_INTERVAL); + if (user_specified_port == 0) { + used_port++; + } + lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds.\n", + CONNECT_RETRY_INTERVAL); + continue; + } else { + break; + } + freeaddrinfo(result); + } + lf_print("Connected to RTI at %s:%d.", hostname, used_port); + return ret; +} diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 8c2678451..8130eb7d9 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -194,4 +194,6 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); +int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_specified_port); + #endif /* SOCKET_COMMON_H */ \ No newline at end of file From eddc99058a274193ea0abb111fe0dddf9aedf2f4 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 21:00:11 -0700 Subject: [PATCH 20/50] Finished common create_TCP_server for RTI and federates. --- core/federated/RTI/rti_remote.c | 6 +- core/federated/federate.c | 47 +------ core/federated/network/socket_common.c | 127 ++++++++++-------- .../core/federated/network/socket_common.h | 5 +- 4 files changed, 79 insertions(+), 106 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 71deb6644..2858f3fc1 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1512,12 +1512,12 @@ void initialize_federate(federate_info_t* fed, uint16_t id) { int32_t start_rti_server(uint16_t port) { _lf_initialize_clock(); // Create the TCP socket server - create_rti_server(port, TCP, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP); + create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP); lf_print("RTI: Listening for federates."); // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - create_rti_server(rti_remote->final_port_TCP + 1, UDP, &rti_remote->socket_descriptor_UDP, + create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, &rti_remote->final_port_UDP); } return rti_remote->socket_descriptor_TCP; @@ -1586,7 +1586,7 @@ void initialize_RTI(rti_remote_t* rti) { rti_remote->num_feds_proposed_start = 0; rti_remote->all_federates_exited = false; rti_remote->federation_id = "Unidentified Federation"; - rti_remote->user_specified_port = 0; + rti_remote->user_specified_port = 1; rti_remote->final_port_TCP = 0; rti_remote->socket_descriptor_TCP = -1; rti_remote->final_port_UDP = UINT16_MAX; diff --git a/core/federated/federate.c b/core/federated/federate.c index f285899fb..07fd358f4 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1965,7 +1965,6 @@ void lf_connect_to_rti(const char* hostname, int port) { } } - // Call a generated (external) function that sends information // about connections between this federate and other federates // where messages are routed through the RTI. @@ -1984,48 +1983,7 @@ void lf_connect_to_rti(const char* hostname, int port) { void lf_create_server(int specified_port) { assert(specified_port <= UINT16_MAX && specified_port >= 0); - uint16_t port = (uint16_t)specified_port; - LF_PRINT_LOG("Creating a socket server on port %d.", port); - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = create_real_time_tcp_socket_errexit(); - - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server address structure. - bzero((char*)&server_fd, sizeof(server_fd)); - - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(port); - - int result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); - int count = 0; - while (result < 0 && count++ < PORT_BIND_RETRY_LIMIT) { - lf_sleep(PORT_BIND_RETRY_INTERVAL); - result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); - } - if (result < 0) { - lf_print_error_and_exit("Failed to bind socket on port %d.", port); - } - - // Set the global server port. - if (specified_port == 0) { - // Need to retrieve the port number assigned by the OS. - struct sockaddr_in assigned; - socklen_t addr_len = sizeof(assigned); - if (getsockname(socket_descriptor, (struct sockaddr*)&assigned, &addr_len) < 0) { - lf_print_error_and_exit("Failed to retrieve assigned port number."); - } - _fed.server_port = ntohs(assigned.sin_port); - } else { - _fed.server_port = port; - } - - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); + create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port); LF_PRINT_LOG("Server for communicating with other federates started using port %d.", _fed.server_port); @@ -2043,9 +2001,6 @@ void lf_create_server(int specified_port) { "Failed to send address advertisement."); LF_PRINT_DEBUG("Sent port %d to the RTI.", _fed.server_port); - - // Set the global server socket - _fed.server_socket = socket_descriptor; } void lf_enqueue_port_absent_reactions(environment_t* env) { diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index c7c717ad0..0b78db414 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -70,45 +70,47 @@ int create_real_time_tcp_socket_errexit() { return sock; } -static void set_socket_timeout_option(int socket_descriptor, socket_type_t socket_type) { - // Timeout time for the communications of the server - struct timeval timeout_time; - if (socket_type == TCP) { - timeout_time = - (struct timeval){.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; - } else if (socket_type == UDP) { - // Set the appropriate timeout time - timeout_time = - (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; - } +/** + * Set the socket timeout options. + * @param socket_descriptor + * @param timeout_time + */ +static void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { // Set the option for this socket to reuse the same address int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); } // Set the timeout on the socket so that read and write operations don't block for too long - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)timeout_time, sizeof(*timeout_time)) < 0) { lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); } - if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)&timeout_time, sizeof(timeout_time)) < 0) { + if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)timeout_time, sizeof(*timeout_time)) < 0) { lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); } } -static int set_socket_bind_option(int socket_descriptor, uint16_t port) { +/** + * Set the socket bind options. If the specified port is 0, it means this is a federate socket server. If the specified + * port is 1, it is creating a RTI server. RTI servers use the port increment when the default port is not available. + * Returns the actually used port. + * + * @param socket_descriptor + * @param specified_port + * @return The final port number used. + */ +static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. bzero((char*)&server_fd, sizeof(server_fd)); - - uint16_t specified_port = port; - if (specified_port == 0) { - port = DEFAULT_PORT; + uint16_t used_port = specified_port; + if (specified_port == 1) { // RTI is set to 1 if no specified port. + used_port = DEFAULT_PORT; } server_fd.sin_family = AF_INET; // IPv4 server_fd.sin_addr.s_addr = INADDR_ANY; // All interfaces, 0.0.0.0. - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(port); + server_fd.sin_port = htons(used_port); // Convert the port number from host byte order to network byte order. int result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); @@ -117,55 +119,70 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t port) { int count = 1; while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { - if (specified_port == 0) { - lf_print_warning("RTI failed to get port %d.", port); - port++; - if (port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) - port = DEFAULT_PORT; - lf_print_warning("RTI will try again with port %d.", port); - server_fd.sin_port = htons(port); + if (specified_port == 1) { // RTI is set to 1 if no specified port. + lf_print_warning("RTI failed to get port %d.", used_port); + used_port++; + if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) + used_port = DEFAULT_PORT; + lf_print_warning("RTI will try again with port %d.", used_port); + server_fd.sin_port = htons(used_port); // Do not sleep. } else { - lf_print("RTI failed to get port %d. Will try again.", port); + lf_print("Failed to bind socket on port %d. Will try again.", used_port); lf_sleep(PORT_BIND_RETRY_INTERVAL); } result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); } + + // Set the global server port. + if (specified_port == 0) { // Federates are set to 0 if no specified port. + // Need to retrieve the port number assigned by the OS. + struct sockaddr_in assigned; + socklen_t addr_len = sizeof(assigned); + if (getsockname(socket_descriptor, (struct sockaddr*)&assigned, &addr_len) < 0) { + lf_print_error_and_exit("Failed to retrieve assigned port number."); + } + used_port = ntohs(assigned.sin_port); + } if (result != 0) { - lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", port); + lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", used_port); } - return port; + return used_port; } -void create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port) { - - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - int socket_descriptor = -1; - if (socket_type == TCP) { - socket_descriptor = create_real_time_tcp_socket_errexit(); - } else if (socket_type == UDP) { - socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - } +void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { + // Create an IPv4 socket for TCP. + int socket_descriptor = create_real_time_tcp_socket_errexit(); if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create RTI socket."); + lf_print_error_system_failure("Failed to create TCP socket."); } - set_socket_timeout_option(socket_descriptor, socket_type); - int out_port = set_socket_bind_option(socket_descriptor, port); - - char* type = (socket_type == TCP) ? "TCP" : "UDP"; - lf_print("RTI using %s port %d.", type, port); - + // Set the timeout time for the communications of the server + struct timeval timeout_time = + (struct timeval){.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; + set_socket_timeout_option(socket_descriptor, &timeout_time); + int used_port = set_socket_bind_option(socket_descriptor, port); + + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); *final_socket = socket_descriptor; - *final_port = out_port; - - if (socket_type == TCP) { - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); - } else if (socket_type == UDP) { - // No need to listen on the UDP socket + *final_port = used_port; +} + +void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { + // Create a UDP socket. + int socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (socket_descriptor < 0) { + lf_print_error_system_failure("Failed to create UDP socket."); } + // Set the timeout time for the communications of the server + struct timeval timeout_time = + (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; + set_socket_timeout_option(socket_descriptor, &timeout_time); + int used_port = set_socket_bind_option(socket_descriptor, port); + *final_socket = socket_descriptor; + *final_port = used_port; } int accept_socket(int socket, struct sockaddr* client_fd) { diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 8130eb7d9..7d2c3d085 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -75,7 +75,7 @@ extern lf_mutex_t socket_mutex; int create_real_time_tcp_socket_errexit(); /** - * Create a server and enable listening for socket connections. + * Create a TCP or UDP server and enable listening for socket connections. * If the specified port if it is non-zero, it will attempt to acquire that port. * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is @@ -90,7 +90,8 @@ int create_real_time_tcp_socket_errexit(); * @param final_socket The socket descriptor on which to accept connections. * @param final_port The final port of the TCP or UDP socket. */ -void create_rti_server(uint16_t port, socket_type_t socket_type, int* final_socket, uint16_t* final_port); +void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); +void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); int accept_socket(int socket, struct sockaddr* client_fd); /** From ec84cb5723b7101466ace9b0e484ddd206bdcdb0 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Mon, 16 Dec 2024 21:53:39 -0700 Subject: [PATCH 21/50] Fix connect_to_socket --- core/federated/federate.c | 154 +++++++----------- core/federated/network/socket_common.c | 15 +- .../core/federated/network/socket_common.h | 2 +- 3 files changed, 67 insertions(+), 104 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 07fd358f4..259573db3 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1757,96 +1757,71 @@ void lf_connect_to_federate(uint16_t remote_federate_id) { assert(port > 0); uint16_t uport = (uint16_t)port; -#if LOG_LEVEL > 3 - // Print the received IP address in a human readable format - // Create the human readable format of the received address. - // This is avoided unless LOG_LEVEL is high enough to - // subdue the overhead caused by inet_ntop(). char hostname[INET_ADDRSTRLEN]; inet_ntop(AF_INET, &host_ip_addr, hostname, INET_ADDRSTRLEN); - LF_PRINT_LOG("Received address %s port %d for federate %d from RTI.", hostname, uport, remote_federate_id); -#endif - + int socket_id = create_real_time_tcp_socket_errexit(); + if (connect_to_socket(socket_id, (const char*)hostname, uport) < 0) { + lf_print_error_and_exit("Failed to connect() to RTI."); + } // Iterate until we either successfully connect or we exceed the CONNECT_TIMEOUT start_connect = lf_time_physical(); - int socket_id = -1; while (result < 0 && !_lf_termination_executed) { - // Create an IPv4 socket for TCP (not UDP) communication over IP (0). - socket_id = create_real_time_tcp_socket_errexit(); - - // Server file descriptor. - struct sockaddr_in server_fd; - // Zero out the server_fd struct. - bzero((char*)&server_fd, sizeof(server_fd)); - - // Set up the server_fd fields. - server_fd.sin_family = AF_INET; // IPv4 - server_fd.sin_addr = host_ip_addr; // Received from the RTI - - // Convert the port number from host byte order to network byte order. - server_fd.sin_port = htons(uport); - result = connect(socket_id, (struct sockaddr*)&server_fd, sizeof(server_fd)); - - if (result != 0) { - lf_print_error("Failed to connect to federate %d on port %d.", remote_federate_id, uport); + // Try again after some time if the connection failed. + // Note that this should not really happen since the remote federate should be + // accepting socket connections. But possibly it will be busy (in process of accepting + // another socket connection?). Hence, we retry. + if (CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT)) { + // If the remote federate is not accepting the connection after CONNECT_TIMEOUT + // treat it as a soft error condition and return. + lf_print_error("Failed to connect to federate %d with timeout: " PRINTF_TIME ". Giving up.", remote_federate_id, + CONNECT_TIMEOUT); + return; + } - // Try again after some time if the connection failed. - // Note that this should not really happen since the remote federate should be - // accepting socket connections. But possibly it will be busy (in process of accepting - // another socket connection?). Hence, we retry. - if (CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT)) { - // If the remote federate is not accepting the connection after CONNECT_TIMEOUT - // treat it as a soft error condition and return. - lf_print_error("Failed to connect to federate %d with timeout: " PRINTF_TIME ". Giving up.", remote_federate_id, - CONNECT_TIMEOUT); - return; - } - lf_print_warning("Could not connect to federate %d. Will try again every" PRINTF_TIME "nanoseconds.\n", - remote_federate_id, ADDRESS_QUERY_RETRY_INTERVAL); + // Check whether the RTI is still there. + if (rti_failed()) { + break; + } + // Connect was successful. + size_t buffer_length = 1 + sizeof(uint16_t) + 1; + unsigned char buffer[buffer_length]; + buffer[0] = MSG_TYPE_P2P_SENDING_FED_ID; + if (_lf_my_fed_id > UINT16_MAX) { + // This error is very unlikely to occur. + lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); + } + encode_uint16((uint16_t)_lf_my_fed_id, (unsigned char*)&(buffer[1])); + unsigned char federation_id_length = (unsigned char)strnlen(federation_metadata.federation_id, 255); + buffer[sizeof(uint16_t) + 1] = federation_id_length; + // Trace the event when tracing is enabled + tracepoint_federate_to_federate(send_FED_ID, _lf_my_fed_id, remote_federate_id, NULL); - // Check whether the RTI is still there. - if (rti_failed()) - break; + // No need for a mutex because we have the only handle on the socket. + write_to_socket_fail_on_error(&socket_id, buffer_length, buffer, NULL, "Failed to send fed_id to federate %d.", + remote_federate_id); + write_to_socket_fail_on_error(&socket_id, federation_id_length, (unsigned char*)federation_metadata.federation_id, + NULL, "Failed to send federation id to federate %d.", remote_federate_id); + read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, + "Failed to read MSG_TYPE_ACK from federate %d in response to sending fed_id.", + remote_federate_id); + if (buffer[0] != MSG_TYPE_ACK) { + // Get the error code. + read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, + "Failed to read error code from federate %d in response to sending fed_id.", + remote_federate_id); + lf_print_error("Received MSG_TYPE_REJECT message from remote federate (%d).", buffer[0]); + result = -1; // Wait ADDRESS_QUERY_RETRY_INTERVAL nanoseconds. lf_sleep(ADDRESS_QUERY_RETRY_INTERVAL); + lf_print_warning("Could not connect to federate %d. Will try again every" PRINTF_TIME "nanoseconds.\n", + remote_federate_id, ADDRESS_QUERY_RETRY_INTERVAL); + continue; } else { - // Connect was successful. - size_t buffer_length = 1 + sizeof(uint16_t) + 1; - unsigned char buffer[buffer_length]; - buffer[0] = MSG_TYPE_P2P_SENDING_FED_ID; - if (_lf_my_fed_id > UINT16_MAX) { - // This error is very unlikely to occur. - lf_print_error_and_exit("Too many federates! More than %d.", UINT16_MAX); - } - encode_uint16((uint16_t)_lf_my_fed_id, (unsigned char*)&(buffer[1])); - unsigned char federation_id_length = (unsigned char)strnlen(federation_metadata.federation_id, 255); - buffer[sizeof(uint16_t) + 1] = federation_id_length; + lf_print("Connected to federate %d, port %d.", remote_federate_id, port); // Trace the event when tracing is enabled - tracepoint_federate_to_federate(send_FED_ID, _lf_my_fed_id, remote_federate_id, NULL); - - // No need for a mutex because we have the only handle on the socket. - write_to_socket_fail_on_error(&socket_id, buffer_length, buffer, NULL, "Failed to send fed_id to federate %d.", - remote_federate_id); - write_to_socket_fail_on_error(&socket_id, federation_id_length, (unsigned char*)federation_metadata.federation_id, - NULL, "Failed to send federation id to federate %d.", remote_federate_id); - - read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, - "Failed to read MSG_TYPE_ACK from federate %d in response to sending fed_id.", - remote_federate_id); - if (buffer[0] != MSG_TYPE_ACK) { - // Get the error code. - read_from_socket_fail_on_error(&socket_id, 1, (unsigned char*)buffer, NULL, - "Failed to read error code from federate %d in response to sending fed_id.", - remote_federate_id); - lf_print_error("Received MSG_TYPE_REJECT message from remote federate (%d).", buffer[0]); - result = -1; - continue; - } else { - lf_print("Connected to federate %d, port %d.", remote_federate_id, port); - // Trace the event when tracing is enabled - tracepoint_federate_to_federate(receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); - } + tracepoint_federate_to_federate(receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); + break; } } // Once we set this variable, then all future calls to close() on this @@ -1861,24 +1836,9 @@ void lf_connect_to_rti(const char* hostname, int port) { hostname = federation_metadata.rti_host ? federation_metadata.rti_host : hostname; port = federation_metadata.rti_port >= 0 ? federation_metadata.rti_port : port; - // Adjust the port. - uint16_t uport = 0; - if (port < 0 || port > INT16_MAX) { - lf_print_error("lf_connect_to_rti(): Specified port (%d) is out of range," - " using the default port %d instead.", - port, DEFAULT_PORT); - uport = DEFAULT_PORT; - port = 0; // Mark so that increments occur between tries. - } else { - uport = (uint16_t)port; - } - if (uport == 0) { - uport = DEFAULT_PORT; - } - // Create a socket _fed.socket_TCP_RTI = create_real_time_tcp_socket_errexit(); - if (connect_to_socket(_fed.socket_TCP_RTI, hostname, uport, port) < 0) { + if (connect_to_socket(_fed.socket_TCP_RTI, hostname, port) < 0) { lf_print_error_and_exit("Failed to connect() to RTI."); } @@ -1948,7 +1908,7 @@ void lf_connect_to_rti(const char* hostname, int port) { read_from_socket_fail_on_error(&_fed.socket_TCP_RTI, 1, &cause, NULL, "Failed to read the cause of rejection by the RTI."); if (cause == FEDERATION_ID_DOES_NOT_MATCH || cause == WRONG_SERVER) { - lf_print_warning("Connected to the wrong RTI on port %d. Will try again", uport); + lf_print_warning("Connected to the wrong RTI. Will try again"); continue; } } else if (response == MSG_TYPE_ACK) { @@ -1957,10 +1917,10 @@ void lf_connect_to_rti(const char* hostname, int port) { LF_PRINT_LOG("Received acknowledgment from the RTI."); break; } else if (response == MSG_TYPE_RESIGN) { - lf_print_warning("RTI on port %d resigned. Will try again", uport); + lf_print_warning("RTI resigned. Will try again"); continue; } else { - lf_print_warning("RTI on port %d gave unexpect response %u. Will try again", uport, response); + lf_print_warning("RTI gave unexpect response %u. Will try again", response); continue; } } diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 0b78db414..82c4dd7c0 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -72,8 +72,8 @@ int create_real_time_tcp_socket_errexit() { /** * Set the socket timeout options. - * @param socket_descriptor - * @param timeout_time + * @param socket_descriptor + * @param timeout_time */ static void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { // Set the option for this socket to reuse the same address @@ -339,7 +339,7 @@ void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* } } -int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_specified_port) { +int connect_to_socket(int sock, const char* hostname, int port) { struct addrinfo hints; struct addrinfo* result; int ret = -1; @@ -352,7 +352,7 @@ int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_sp hints.ai_next = NULL; hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ - int used_port = (user_specified_port == 0) ? port : user_specified_port; + uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t) port; instant_t start_connect = lf_time_physical(); // while (!_lf_termination_executed) { // Not working... @@ -375,8 +375,11 @@ int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_sp ret = connect(sock, result->ai_addr, result->ai_addrlen); if (ret < 0) { lf_sleep(CONNECT_RETRY_INTERVAL); - if (user_specified_port == 0) { + if (port == 0) { used_port++; + if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) { + used_port = DEFAULT_PORT; + } } lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds.\n", CONNECT_RETRY_INTERVAL); @@ -386,6 +389,6 @@ int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_sp } freeaddrinfo(result); } - lf_print("Connected to RTI at %s:%d.", hostname, used_port); + lf_print("Connected to %s:%d.", hostname, used_port); return ret; } diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 7d2c3d085..201f78081 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -195,6 +195,6 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); -int connect_to_socket(int sock, const char* hostname, int port, uint16_t user_specified_port); +int connect_to_socket(int sock, const char* hostname, int port); #endif /* SOCKET_COMMON_H */ \ No newline at end of file From 1fc5e827ae32aefa33230ca5121745b06672530d Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 10:20:58 -0700 Subject: [PATCH 22/50] Fix formatting --- core/federated/RTI/rti_remote.c | 3 +-- core/federated/RTI/rti_remote.h | 2 -- core/federated/network/socket_common.c | 2 +- 3 files changed, 2 insertions(+), 5 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 2858f3fc1..42a42da85 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1517,8 +1517,7 @@ int32_t start_rti_server(uint16_t port) { // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, - &rti_remote->final_port_UDP); + create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, &rti_remote->final_port_UDP); } return rti_remote->socket_descriptor_TCP; } diff --git a/core/federated/RTI/rti_remote.h b/core/federated/RTI/rti_remote.h index 173c4522c..adcacf4db 100644 --- a/core/federated/RTI/rti_remote.h +++ b/core/federated/RTI/rti_remote.h @@ -39,8 +39,6 @@ ///////////////////////////////////////////// //// Data structures - - /** * Information about a federate known to the RTI, including its runtime state, * mode of execution, and connectivity with other federates. diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 82c4dd7c0..6d21f4fe2 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -352,7 +352,7 @@ int connect_to_socket(int sock, const char* hostname, int port) { hints.ai_next = NULL; hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ - uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t) port; + uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t)port; instant_t start_connect = lf_time_physical(); // while (!_lf_termination_executed) { // Not working... From 287ee17b556cf3de79af2c0bbc8ccdd093778fbc Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 10:38:29 -0700 Subject: [PATCH 23/50] Add comments. --- core/federated/network/socket_common.c | 137 ++++++++---------- .../core/federated/network/socket_common.h | 29 +++- 2 files changed, 88 insertions(+), 78 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 6d21f4fe2..26a5ad8e9 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -1,21 +1,3 @@ -// #include /* htons */ -// #include -// #include // IPPROTO_TCP, IPPROTO_UDP -// #include // TCP_NODELAY -// #include -// #include -// #include -// #include -// #include -// #include -// #include -// #include - -// #include "util.h" -// #include "net_common.h" -// #include "net_util.h" -// #include "socket_common.h" - #include // Defines read(), write(), and close() #include // IPPROTO_TCP, IPPROTO_UDP #include // TCP_NODELAY @@ -72,8 +54,9 @@ int create_real_time_tcp_socket_errexit() { /** * Set the socket timeout options. - * @param socket_descriptor - * @param timeout_time + * @param socket_descriptor The file descriptor of the socket on which to set options. + * @param timeout_time A pointer to a `struct timeval` that specifies the timeout duration + * for socket operations (receive and send). */ static void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { // Set the option for this socket to reuse the same address @@ -95,8 +78,10 @@ static void set_socket_timeout_option(int socket_descriptor, struct timeval* tim * port is 1, it is creating a RTI server. RTI servers use the port increment when the default port is not available. * Returns the actually used port. * - * @param socket_descriptor - * @param specified_port + * @param socket_descriptor The file descriptor of the socket to be bound to an address and port. + * @param specified_port The port number to bind the socket to. If set to 0, the OS assigns a port. + * If set to 1, the function starts binding at the `DEFAULT_PORT` and increments + * until an available port is found. * @return The final port number used. */ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { @@ -206,6 +191,60 @@ int accept_socket(int socket, struct sockaddr* client_fd) { return socket_id; } +int connect_to_socket(int sock, const char* hostname, int port) { + struct addrinfo hints; + struct addrinfo* result; + int ret = -1; + + memset(&hints, 0, sizeof(hints)); + hints.ai_family = AF_INET; /* Allow IPv4 */ + hints.ai_socktype = SOCK_STREAM; /* Stream socket */ + hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ + hints.ai_addr = NULL; + hints.ai_next = NULL; + hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ + + uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t)port; + + instant_t start_connect = lf_time_physical(); + // while (!_lf_termination_executed) { // Not working... + while (1) { + if (CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT)) { + lf_print_error("Failed to connect with timeout: " PRINTF_TIME ". Giving up.", CONNECT_TIMEOUT); + break; + } + // Convert port number to string. + char str[6]; + sprintf(str, "%u", used_port); + + // Get address structure matching hostname and hints criteria, and + // set port to the port number provided in str. There should only + // ever be one matching address structure, and we connect to that. + if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) { + lf_print_error("No host matching given hostname: %s", hostname); + break; + } + ret = connect(sock, result->ai_addr, result->ai_addrlen); + if (ret < 0) { + lf_sleep(CONNECT_RETRY_INTERVAL); + if (port == 0) { + used_port++; + if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) { + used_port = DEFAULT_PORT; + } + } + lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds.\n", + CONNECT_RETRY_INTERVAL); + continue; + } else { + break; + } + freeaddrinfo(result); + } + lf_print("Connected to %s:%d.", hostname, used_port); + return ret; +} + int read_from_socket(int socket, size_t num_bytes, unsigned char* buffer) { if (socket < 0) { // Socket is not open. @@ -338,57 +377,3 @@ void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* } } } - -int connect_to_socket(int sock, const char* hostname, int port) { - struct addrinfo hints; - struct addrinfo* result; - int ret = -1; - - memset(&hints, 0, sizeof(hints)); - hints.ai_family = AF_INET; /* Allow IPv4 */ - hints.ai_socktype = SOCK_STREAM; /* Stream socket */ - hints.ai_protocol = IPPROTO_TCP; /* TCP protocol */ - hints.ai_addr = NULL; - hints.ai_next = NULL; - hints.ai_flags = AI_NUMERICSERV; /* Allow only numeric port numbers */ - - uint16_t used_port = (port == 0) ? DEFAULT_PORT : (uint16_t)port; - - instant_t start_connect = lf_time_physical(); - // while (!_lf_termination_executed) { // Not working... - while (1) { - if (CHECK_TIMEOUT(start_connect, CONNECT_TIMEOUT)) { - lf_print_error("Failed to connect with timeout: " PRINTF_TIME ". Giving up.", CONNECT_TIMEOUT); - break; - } - // Convert port number to string. - char str[6]; - sprintf(str, "%u", used_port); - - // Get address structure matching hostname and hints criteria, and - // set port to the port number provided in str. There should only - // ever be one matching address structure, and we connect to that. - if (getaddrinfo(hostname, (const char*)&str, &hints, &result)) { - lf_print_error("No host matching given hostname: %s", hostname); - break; - } - ret = connect(sock, result->ai_addr, result->ai_addrlen); - if (ret < 0) { - lf_sleep(CONNECT_RETRY_INTERVAL); - if (port == 0) { - used_port++; - if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) { - used_port = DEFAULT_PORT; - } - } - lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds.\n", - CONNECT_RETRY_INTERVAL); - continue; - } else { - break; - } - freeaddrinfo(result); - } - lf_print("Connected to %s:%d.", hostname, used_port); - return ret; -} diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 201f78081..8a7b88718 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -93,7 +93,34 @@ int create_real_time_tcp_socket_errexit(); void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); +/** + * This function waits for an incoming connection request on the specified server socket. + * It blocks until a connection is successfully accepted. If an error occurs that is not + * temporary (e.g., `EAGAIN` or `EWOULDBLOCK`), it reports the error and exits. Temporary + * errors cause the function to retry accepting the connection. + * + * @param socket The server socket file descriptor that is listening for incoming connections. + * @param client_fd A pointer to a `struct sockaddr` that will hold the client's address information. + * @return int The file descriptor for the newly accepted socket on success, or -1 on failure + * (with an appropriate error message printed). + */ int accept_socket(int socket, struct sockaddr* client_fd); + +/** + * + * This function attempts to establish a TCP connection to the specified hostname + * and port. It uses `getaddrinfo` to resolve the hostname and retries the connection + * periodically if it fails. If the specified port is 0, it iterates through a range + * of default ports starting from `DEFAULT_PORT`. The function will stop retrying + * if the `CONNECT_TIMEOUT` is reached. + * + * @param sock The socket file descriptor that has already been created (using `socket()`). + * @param hostname The hostname or IP address of the server to connect to. + * @param port The port number to connect to. If 0 is specified, a default port range will be used. + * @return 0 on success, -1 on failure, and `errno` is set to indicate the specific error. + */ +int connect_to_socket(int sock, const char* hostname, int port); + /** * Read the specified number of bytes from the specified socket into the specified buffer. * If an error occurs during this reading, return -1 and set errno to indicate @@ -195,6 +222,4 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); -int connect_to_socket(int sock, const char* hostname, int port); - #endif /* SOCKET_COMMON_H */ \ No newline at end of file From 6e5dc7bbe065d34cd428ae3dc29b0fad8dae1c40 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 13:05:52 -0700 Subject: [PATCH 24/50] Fix receive_and_check_fed_id_message not to get client_fd --- core/federated/RTI/rti_remote.c | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 42a42da85..3314981c5 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1045,7 +1045,7 @@ void send_reject(int* socket_id, unsigned char error_code) { * @param client_fd The socket address. * @return The federate ID for success or -1 for failure. */ -static int32_t receive_and_check_fed_id_message(int* socket_id, struct sockaddr_in* client_fd) { +static int32_t receive_and_check_fed_id_message(int* socket_id) { // Buffer for message ID, federate ID, and federation ID length. size_t length = 1 + sizeof(uint16_t) + 1; // Message ID, federate ID, length of fedration ID. unsigned char buffer[length]; @@ -1139,10 +1139,16 @@ static int32_t receive_and_check_fed_id_message(int* socket_id, struct sockaddr_ // The IP address is stored here as an in_addr struct (in .server_ip_addr) that can be useful // to create sockets and can be efficiently sent over the network. // First, convert the sockaddr structure into a sockaddr_in that contains an internet address. - struct sockaddr_in* pV4_addr = client_fd; + // struct sockaddr_in* pV4_addr = client_fd; // Then extract the internet address (which is in IPv4 format) and assign it as the federate's socket server - fed->server_ip_addr = pV4_addr->sin_addr; - + // fed->server_ip_addr = pV4_addr->sin_addr; + struct sockaddr_in peer_addr; + socklen_t addr_len = sizeof(peer_addr); + if (getpeername(*socket_id, (struct sockaddr*)&peer_addr, &addr_len) != 0) { + lf_print_error("RTI failed to get peer address."); + } + fed->server_ip_addr = peer_addr.sin_addr; + #if LOG_LEVEL >= LOG_LEVEL_DEBUG // Create the human readable format and copy that into // the .server_hostname field of the federate. @@ -1412,8 +1418,7 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - struct sockaddr client_fd; - int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, &client_fd); + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { @@ -1431,7 +1436,7 @@ void lf_connect_to_federates(int socket_descriptor) { #endif // The first message from the federate should contain its ID and the federation ID. - int32_t fed_id = receive_and_check_fed_id_message(&socket_id, (struct sockaddr_in*)&client_fd); + int32_t fed_id = receive_and_check_fed_id_message(&socket_id); if (fed_id >= 0 && socket_id >= 0 && receive_connection_information(&socket_id, (uint16_t)fed_id) && receive_udp_message_and_set_up_clock_sync(&socket_id, (uint16_t)fed_id)) { From 8ead8840e86a0feed75ed3f0457f75db8f865e62 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 13:06:22 -0700 Subject: [PATCH 25/50] Remove sockaddr from accpet_socket as input --- core/federated/network/socket_common.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 26a5ad8e9..713361ece 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -170,13 +170,14 @@ void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { *final_port = used_port; } -int accept_socket(int socket, struct sockaddr* client_fd) { +int accept_socket(int socket) { + struct sockaddr client_fd; // Wait for an incoming connection request. - uint32_t client_length = sizeof(*client_fd); + uint32_t client_length = sizeof(client_fd); // The following blocks until a federate connects. int socket_id = -1; while (1) { - socket_id = accept(socket, client_fd, &client_length); + socket_id = accept(socket, &client_fd, &client_length); if (socket_id >= 0) { // Got a socket break; From ef44cddaad73d25addd1228e5303b82deb7bbc79 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 14:05:57 -0700 Subject: [PATCH 26/50] Fix accept_socket. Now federate.c's lf_handle_p2p_connections_from_federates(), and rti_remote.c's lf_connect_to_federates() and respond_to_erroneous_connections() uses a single function to accept_socket. --- core/federated/RTI/rti_remote.c | 6 ++-- core/federated/federate.c | 17 ++-------- core/federated/network/socket_common.c | 34 ++++++++++++++++--- .../core/federated/network/socket_common.h | 9 +++-- 4 files changed, 41 insertions(+), 25 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 3314981c5..c0af6341a 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1418,7 +1418,7 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - int socket_id = accept_socket(rti_remote->socket_descriptor_TCP); + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { @@ -1476,11 +1476,9 @@ void* respond_to_erroneous_connections(void* nothing) { initialize_lf_thread_id(); while (true) { // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); // The following will block until either a federate attempts to connect // or close(rti->socket_descriptor_TCP) is called. - int socket_id = accept(rti_remote->socket_descriptor_TCP, &client_fd, &client_length); + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); if (socket_id < 0) return NULL; diff --git a/core/federated/federate.c b/core/federated/federate.c index 259573db3..3006adea3 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1994,21 +1994,10 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) { _fed.inbound_socket_listeners = (lf_thread_t*)calloc(_fed.number_of_inbound_p2p_connections, sizeof(lf_thread_t)); while (received_federates < _fed.number_of_inbound_p2p_connections && !_lf_termination_executed) { // Wait for an incoming connection request. - struct sockaddr client_fd; - uint32_t client_length = sizeof(client_fd); - int socket_id = accept(_fed.server_socket, &client_fd, &client_length); - + int socket_id = accept_socket(_fed.server_socket, _fed.socket_TCP_RTI); if (socket_id < 0) { - if (errno == EAGAIN || errno == EWOULDBLOCK || errno == EINTR) { - if (rti_failed()) - break; - else - continue; // Try again. - } else if (errno == EPERM) { - lf_print_error_system_failure("Firewall permissions prohibit connection."); - } else { - lf_print_error_system_failure("A fatal error occurred while accepting a new socket."); - } + lf_print_warning("Federate failed to accept the socket."); + return NULL; } LF_PRINT_LOG("Accepted new connection from remote federate."); diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 713361ece..e3401522c 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -170,22 +170,46 @@ void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { *final_port = used_port; } -int accept_socket(int socket) { +/** + * Return true if either the socket to the RTI is broken or the socket is + * alive and the first unread byte on the socket's queue is MSG_TYPE_FAILED. + */ +bool check_socket_closed(int socket) { + unsigned char first_byte; + ssize_t bytes = peek_from_socket(socket, &first_byte); + if (bytes < 0 || (bytes == 1 && first_byte == MSG_TYPE_FAILED)) { + return true; + } else { + return false; + } +} + +int accept_socket(int socket, int rti_socket) { struct sockaddr client_fd; // Wait for an incoming connection request. uint32_t client_length = sizeof(client_fd); // The following blocks until a federate connects. int socket_id = -1; - while (1) { + while (true) { + // When close(socket) is called, the accept() will return -1. socket_id = accept(socket, &client_fd, &client_length); if (socket_id >= 0) { // Got a socket break; - } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK)) { - lf_print_error_system_failure("RTI failed to accept the socket."); + } else if (socket_id < 0 && (errno != EAGAIN || errno != EWOULDBLOCK || errno != EINTR)) { + lf_print_warning("Failed to accept the socket. %s.", strerror(errno)); + break; + } else if (errno == EPERM) { + lf_print_error_system_failure("Firewall permissions prohibit connection."); } else { + // For the federates, it should check if the rti_socket is still open, before retrying accept(). + if (rti_socket == -1) { + if (check_socket_closed(rti_socket)) { + break; + } + } // Try again - lf_print_warning("RTI failed to accept the socket. %s. Trying again.", strerror(errno)); + lf_print_warning("Failed to accept the socket. %s. Trying again.", strerror(errno)); continue; } } diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 8a7b88718..761cb0555 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -58,6 +58,11 @@ */ #define DEFAULT_PORT 15045u +/** + * Byte identifying that the federate or the RTI has failed. + */ +#define MSG_TYPE_FAILED 25 + typedef enum socket_type_t { TCP, UDP } socket_type_t; /** @@ -100,11 +105,11 @@ void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); * errors cause the function to retry accepting the connection. * * @param socket The server socket file descriptor that is listening for incoming connections. - * @param client_fd A pointer to a `struct sockaddr` that will hold the client's address information. + * @param rti_socket The rti socket for the federate to check if it is still open. * @return int The file descriptor for the newly accepted socket on success, or -1 on failure * (with an appropriate error message printed). */ -int accept_socket(int socket, struct sockaddr* client_fd); +int accept_socket(int socket, int rti_socket); /** * From efbce31b8ebdcaa2e3004e2b8ef428a02736dda7 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 15:13:33 -0700 Subject: [PATCH 27/50] Minor formatting. --- core/federated/RTI/rti_remote.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index c0af6341a..f9531730a 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1148,7 +1148,7 @@ static int32_t receive_and_check_fed_id_message(int* socket_id) { lf_print_error("RTI failed to get peer address."); } fed->server_ip_addr = peer_addr.sin_addr; - + #if LOG_LEVEL >= LOG_LEVEL_DEBUG // Create the human readable format and copy that into // the .server_hostname field of the federate. @@ -1479,9 +1479,9 @@ void* respond_to_erroneous_connections(void* nothing) { // The following will block until either a federate attempts to connect // or close(rti->socket_descriptor_TCP) is called. int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); - if (socket_id < 0) + if (socket_id < 0) { return NULL; - + } if (rti_remote->all_federates_exited) { return NULL; } From bf0573ba02a1ce0524f520fa4bc9ef2f33db029e Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 16:16:22 -0700 Subject: [PATCH 28/50] Split accept to two functions. --- core/federated/RTI/rti_remote.c | 4 ++-- core/federated/federate.c | 2 +- core/federated/network/socket_common.c | 7 +++++-- include/core/federated/network/socket_common.h | 8 +++++--- 4 files changed, 13 insertions(+), 8 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index f9531730a..57b28deb9 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1418,7 +1418,7 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); + int socket_id = accept_rti_socket(rti_remote->socket_descriptor_TCP); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { @@ -1478,7 +1478,7 @@ void* respond_to_erroneous_connections(void* nothing) { // Wait for an incoming connection request. // The following will block until either a federate attempts to connect // or close(rti->socket_descriptor_TCP) is called. - int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); + int socket_id = accept_rti_socket(rti_remote->socket_descriptor_TCP); if (socket_id < 0) { return NULL; } diff --git a/core/federated/federate.c b/core/federated/federate.c index 3006adea3..9282b2df9 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1994,7 +1994,7 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) { _fed.inbound_socket_listeners = (lf_thread_t*)calloc(_fed.number_of_inbound_p2p_connections, sizeof(lf_thread_t)); while (received_federates < _fed.number_of_inbound_p2p_connections && !_lf_termination_executed) { // Wait for an incoming connection request. - int socket_id = accept_socket(_fed.server_socket, _fed.socket_TCP_RTI); + int socket_id = accept_federate_socket(_fed.server_socket, _fed.socket_TCP_RTI); if (socket_id < 0) { lf_print_warning("Federate failed to accept the socket."); return NULL; diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index e3401522c..db7533bd7 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -58,7 +58,7 @@ int create_real_time_tcp_socket_errexit() { * @param timeout_time A pointer to a `struct timeval` that specifies the timeout duration * for socket operations (receive and send). */ -static void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { +void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { // Set the option for this socket to reuse the same address int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { @@ -84,7 +84,7 @@ static void set_socket_timeout_option(int socket_descriptor, struct timeval* tim * until an available port is found. * @return The final port number used. */ -static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { +int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. @@ -216,6 +216,9 @@ int accept_socket(int socket, int rti_socket) { return socket_id; } +int accept_rti_socket(int socket) { return accept_socket(socket, -1); } +int accept_federate_socket(int socket, int rti_socket) { return accept_socket(socket, rti_socket); } + int connect_to_socket(int sock, const char* hostname, int port) { struct addrinfo hints; struct addrinfo* result; diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 761cb0555..e1cf12272 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -99,17 +99,19 @@ void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); /** - * This function waits for an incoming connection request on the specified server socket. + * These two functions waits for an incoming connection request on the specified server socket. * It blocks until a connection is successfully accepted. If an error occurs that is not * temporary (e.g., `EAGAIN` or `EWOULDBLOCK`), it reports the error and exits. Temporary - * errors cause the function to retry accepting the connection. + * errors cause the function to retry accepting the connection. The accept_federate_socket() function additionally + * checks the RTI's server socket if it is still alive. * * @param socket The server socket file descriptor that is listening for incoming connections. * @param rti_socket The rti socket for the federate to check if it is still open. * @return int The file descriptor for the newly accepted socket on success, or -1 on failure * (with an appropriate error message printed). */ -int accept_socket(int socket, int rti_socket); +int accept_rti_socket(int socket); +int accept_federate_socket(int socket, int rti_socket); /** * From 4860623ab0a4fd9e00bd7db712ab171c20abfdde Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 16:29:08 -0700 Subject: [PATCH 29/50] Remove code duplication on create_TCP/UDP_sever --- core/federated/network/socket_common.c | 51 ++++++++++++++------------ 1 file changed, 27 insertions(+), 24 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index db7533bd7..3b3c02344 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -135,39 +135,42 @@ int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { return used_port; } -void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - // Create an IPv4 socket for TCP. - int socket_descriptor = create_real_time_tcp_socket_errexit(); +void create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type) { + int socket_descriptor; + struct timeval timeout_time; + if (sock_type == TCP) { + // Create an IPv4 socket for TCP. + socket_descriptor = create_real_time_tcp_socket_errexit(); + // Set the timeout time for the communications of the server + timeout_time = + (struct timeval){.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; + } else { + // Create a UDP socket. + socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + timeout_time = + (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; + } + char* type = (sock_type == TCP) ? "TCP" : "UDP"; if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create TCP socket."); + lf_print_error_system_failure("Failed to create %s socket.", type); } - // Set the timeout time for the communications of the server - struct timeval timeout_time = - (struct timeval){.tv_sec = TCP_TIMEOUT_TIME / BILLION, .tv_usec = (TCP_TIMEOUT_TIME % BILLION) / 1000}; set_socket_timeout_option(socket_descriptor, &timeout_time); int used_port = set_socket_bind_option(socket_descriptor, port); - - // Enable listening for socket connections. - // The second argument is the maximum number of queued socket requests, - // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); + if (sock_type == TCP) { + // Enable listening for socket connections. + // The second argument is the maximum number of queued socket requests, + // which according to the Mac man page is limited to 128. + listen(socket_descriptor, 128); + } *final_socket = socket_descriptor; *final_port = used_port; } +void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { + create_server(port, final_socket, final_port, TCP); +} void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - // Create a UDP socket. - int socket_descriptor = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); - if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create UDP socket."); - } - // Set the timeout time for the communications of the server - struct timeval timeout_time = - (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; - set_socket_timeout_option(socket_descriptor, &timeout_time); - int used_port = set_socket_bind_option(socket_descriptor, port); - *final_socket = socket_descriptor; - *final_port = used_port; + create_server(port, final_socket, final_port, UDP); } /** From d87ddd6141c7dcef88a01b14949c284c95b0e0de Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 21:08:13 -0700 Subject: [PATCH 30/50] Rollback the socket_type_t to be in rti_remote.h. --- core/federated/network/socket_common.c | 12 ++++++------ include/core/federated/network/socket_common.h | 2 -- 2 files changed, 6 insertions(+), 8 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 3b3c02344..f0313f66b 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -135,10 +135,10 @@ int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { return used_port; } -void create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type) { +void create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type) { int socket_descriptor; struct timeval timeout_time; - if (sock_type == TCP) { + if (sock_type == 0) { // Create an IPv4 socket for TCP. socket_descriptor = create_real_time_tcp_socket_errexit(); // Set the timeout time for the communications of the server @@ -150,13 +150,13 @@ void create_server(uint16_t port, int* final_socket, uint16_t* final_port, socke timeout_time = (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; } - char* type = (sock_type == TCP) ? "TCP" : "UDP"; + char* type = (sock_type == 0) ? "TCP" : "UDP"; if (socket_descriptor < 0) { lf_print_error_system_failure("Failed to create %s socket.", type); } set_socket_timeout_option(socket_descriptor, &timeout_time); int used_port = set_socket_bind_option(socket_descriptor, port); - if (sock_type == TCP) { + if (sock_type == 0) { // Enable listening for socket connections. // The second argument is the maximum number of queued socket requests, // which according to the Mac man page is limited to 128. @@ -167,10 +167,10 @@ void create_server(uint16_t port, int* final_socket, uint16_t* final_port, socke } void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - create_server(port, final_socket, final_port, TCP); + create_server(port, final_socket, final_port, 0); } void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - create_server(port, final_socket, final_port, UDP); + create_server(port, final_socket, final_port, 1); } /** diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index e1cf12272..867785267 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -63,8 +63,6 @@ */ #define MSG_TYPE_FAILED 25 -typedef enum socket_type_t { TCP, UDP } socket_type_t; - /** * Mutex protecting socket close operations. */ From 206d0f4932c5f987b12b61075b29f9dbe1845d27 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 21:09:49 -0700 Subject: [PATCH 31/50] Add empty line in end and rollback socket_type. --- core/federated/RTI/rti_remote.h | 5 +++-- include/core/federated/network/socket_common.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) diff --git a/core/federated/RTI/rti_remote.h b/core/federated/RTI/rti_remote.h index adcacf4db..000fabcac 100644 --- a/core/federated/RTI/rti_remote.h +++ b/core/federated/RTI/rti_remote.h @@ -22,7 +22,6 @@ #include // Defines read(), write(), and close() #include // Defines bzero(). -#include "net_util.h" #include "rti_common.h" #ifdef __RTI_AUTH__ @@ -39,6 +38,8 @@ ///////////////////////////////////////////// //// Data structures +typedef enum socket_type_t { TCP, UDP } socket_type_t; + /** * Information about a federate known to the RTI, including its runtime state, * mode of execution, and connectivity with other federates. @@ -412,4 +413,4 @@ int process_args(int argc, const char* argv[]); void initialize_RTI(rti_remote_t* rti); #endif // RTI_REMOTE_H -#endif // STANDALONE_RTI \ No newline at end of file +#endif // STANDALONE_RTI diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 867785267..37bf14f48 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -227,4 +227,4 @@ int write_to_socket_close_on_error(int* socket, size_t num_bytes, unsigned char* void write_to_socket_fail_on_error(int* socket, size_t num_bytes, unsigned char* buffer, lf_mutex_t* mutex, char* format, ...); -#endif /* SOCKET_COMMON_H */ \ No newline at end of file +#endif /* SOCKET_COMMON_H */ From 33cca9037f19a14470d99e64d6d79b4116fce1cf Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Tue, 17 Dec 2024 21:15:31 -0700 Subject: [PATCH 32/50] Remove commented out code. --- core/federated/RTI/rti_remote.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 57b28deb9..d2b9acf7f 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1135,13 +1135,8 @@ static int32_t receive_and_check_fed_id_message(int* socket_id) { } federate_info_t* fed = GET_FED_INFO(fed_id); // The MSG_TYPE_FED_IDS message has the right federation ID. - // Assign the address information for federate. - // The IP address is stored here as an in_addr struct (in .server_ip_addr) that can be useful - // to create sockets and can be efficiently sent over the network. - // First, convert the sockaddr structure into a sockaddr_in that contains an internet address. - // struct sockaddr_in* pV4_addr = client_fd; - // Then extract the internet address (which is in IPv4 format) and assign it as the federate's socket server - // fed->server_ip_addr = pV4_addr->sin_addr; + + // Get the peer address from the connected socket_id. Then assign it as the federate's socket server. struct sockaddr_in peer_addr; socklen_t addr_len = sizeof(peer_addr); if (getpeername(*socket_id, (struct sockaddr*)&peer_addr, &addr_len) != 0) { From e9642b5e3231ae6ff7e8faaeecd9ddcd4144bbff Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Wed, 18 Dec 2024 11:41:11 -0700 Subject: [PATCH 33/50] Add commnets on user_specified_port. --- core/federated/RTI/rti_remote.c | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index d2b9acf7f..ef6478ab3 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1583,6 +1583,23 @@ void initialize_RTI(rti_remote_t* rti) { rti_remote->num_feds_proposed_start = 0; rti_remote->all_federates_exited = false; rti_remote->federation_id = "Unidentified Federation"; + // The federate and RTI both initialize the user_specified_port as 0. For the RTI, when the user_specified_port is 0, + // it recognizes that there was no user input to assign a port and assigns the port to the DEFAULT_PORT, which is + // 15045. + + // For the federate, when the user_specified_port is 0, it also recognizes that there was no user input and makes the + // OS assign the port. + + // The create_TCP_server gets this input argument port, which is actually the user_specified_port for both RTI and + // federate. To distinguish whether the RTI or federate is calling this function, I changed the initial value of the + // RTI's port as 1. + + // We need to distinguish whether the RTI or federate called the create_TCP_server function for two reasons. + + // First, we should set the default port, which RTI and federate differ. + // Next, RTI increments the port number starting from 15045, when the port binding fails. However, the federate does + // not increment the port number and try binding. + // For more info, check this pr. https://github.com/lf-lang/reactor-c/pull/505 rti_remote->user_specified_port = 1; rti_remote->final_port_TCP = 0; rti_remote->socket_descriptor_TCP = -1; From 07119ef9de4a63240a408ed1a32c48990a62a788 Mon Sep 17 00:00:00 2001 From: Dongha Kim <74869052+Jakio815@users.noreply.github.com> Date: Fri, 20 Dec 2024 09:45:25 -0700 Subject: [PATCH 34/50] Apply suggestions from code review Co-authored-by: Edward A. Lee --- core/federated/RTI/rti_remote.c | 22 +++++-------------- .../core/federated/network/socket_common.h | 2 +- 2 files changed, 6 insertions(+), 18 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index ef6478ab3..a6ef46307 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1583,23 +1583,11 @@ void initialize_RTI(rti_remote_t* rti) { rti_remote->num_feds_proposed_start = 0; rti_remote->all_federates_exited = false; rti_remote->federation_id = "Unidentified Federation"; - // The federate and RTI both initialize the user_specified_port as 0. For the RTI, when the user_specified_port is 0, - // it recognizes that there was no user input to assign a port and assigns the port to the DEFAULT_PORT, which is - // 15045. - - // For the federate, when the user_specified_port is 0, it also recognizes that there was no user input and makes the - // OS assign the port. - - // The create_TCP_server gets this input argument port, which is actually the user_specified_port for both RTI and - // federate. To distinguish whether the RTI or federate is calling this function, I changed the initial value of the - // RTI's port as 1. - - // We need to distinguish whether the RTI or federate called the create_TCP_server function for two reasons. - - // First, we should set the default port, which RTI and federate differ. - // Next, RTI increments the port number starting from 15045, when the port binding fails. However, the federate does - // not increment the port number and try binding. - // For more info, check this pr. https://github.com/lf-lang/reactor-c/pull/505 + // Default values for user_specified_port are 0 for a federate and 1 for the RTI. Neither of these are valid port + // numbers, but rather specify that an available port needs to be found. With value 0, the operating system will + // provide an available port number. With value 1, the function will first try DEFAULT_PORT, which is 15045, and, + // if this fails, wait for time given by PORT_BIND_RETRY_INTERVAL and try again. It fails if this process fails after + // MAX_NUM_PORT_ADDRESSES tries. For more details, check this PR. https://github.com/lf-lang/reactor-c/pull/505. rti_remote->user_specified_port = 1; rti_remote->final_port_TCP = 0; rti_remote->socket_descriptor_TCP = -1; diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 37bf14f48..9c088becf 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -24,7 +24,7 @@ /** * Bound on the number of retries to connect to the RTI. - * A federate will retry every CONNECT_RETRY_INTERVAL seconds until + * A federate will retry every CONNECT_RETRY_INTERVAL nanoseconds until * CONNECTION_TIMEOUT expires. */ #define CONNECT_TIMEOUT MINUTES(1) From e14d07fda2a69146d7317cf8b825229a5a519859 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 09:48:30 -0700 Subject: [PATCH 35/50] Move socket_common.h header to the top with the FEDERATED guards. --- include/core/federated/network/net_util.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/include/core/federated/network/net_util.h b/include/core/federated/network/net_util.h index 14746b74f..1e9008816 100644 --- a/include/core/federated/network/net_util.h +++ b/include/core/federated/network/net_util.h @@ -51,6 +51,10 @@ THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include "low_level_platform.h" #include "tag.h" +#ifdef FEDERATED +#include "socket_common.h" +#endif + #define HOST_LITTLE_ENDIAN 1 #define HOST_BIG_ENDIAN 2 @@ -156,8 +160,6 @@ uint16_t extract_uint16(unsigned char* bytes); #ifdef FEDERATED -#include "socket_common.h" - /** * Extract the core header information that all messages between * federates share. The core header information is two bytes with From 4cbf83e3b849f5e2edccd0a777930d2242452274 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:14:19 -0700 Subject: [PATCH 36/50] Change create_server() function to return 0 for success, -1 for fail. --- core/federated/RTI/rti_remote.c | 9 +++++-- core/federated/federate.c | 5 ++-- core/federated/network/socket_common.c | 25 +++++++++++-------- .../core/federated/network/socket_common.h | 4 +-- 4 files changed, 27 insertions(+), 16 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index a6ef46307..6d7c44771 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1510,12 +1510,17 @@ void initialize_federate(federate_info_t* fed, uint16_t id) { int32_t start_rti_server(uint16_t port) { _lf_initialize_clock(); // Create the TCP socket server - create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP); + if (create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP)) { + lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); + }; lf_print("RTI: Listening for federates."); // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, &rti_remote->final_port_UDP); + if (create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, + &rti_remote->final_port_UDP)) { + lf_print_error_system_failure("RTI failed to create UDP server: %s.", strerror(errno)); + } } return rti_remote->socket_descriptor_TCP; } diff --git a/core/federated/federate.c b/core/federated/federate.c index 9282b2df9..0656da15e 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1943,8 +1943,9 @@ void lf_connect_to_rti(const char* hostname, int port) { void lf_create_server(int specified_port) { assert(specified_port <= UINT16_MAX && specified_port >= 0); - create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port); - + if (create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port)) { + lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); + }; LF_PRINT_LOG("Server for communicating with other federates started using port %d.", _fed.server_port); // Send the server port number to the RTI diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index f0313f66b..d07615482 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -62,14 +62,14 @@ void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_ti // Set the option for this socket to reuse the same address int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { - lf_print_error("RTI failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); + lf_print_error("Failed to set SO_REUSEADDR option on the socket: %s.", strerror(errno)); } // Set the timeout on the socket so that read and write operations don't block for too long if (setsockopt(socket_descriptor, SOL_SOCKET, SO_RCVTIMEO, (const char*)timeout_time, sizeof(*timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); + lf_print_error("Failed to set SO_RCVTIMEO option on the socket: %s.", strerror(errno)); } if (setsockopt(socket_descriptor, SOL_SOCKET, SO_SNDTIMEO, (const char*)timeout_time, sizeof(*timeout_time)) < 0) { - lf_print_error("RTI failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); + lf_print_error("Failed to set SO_SNDTIMEO option on the socket: %s.", strerror(errno)); } } @@ -135,7 +135,7 @@ int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { return used_port; } -void create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type) { +static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type) { int socket_descriptor; struct timeval timeout_time; if (sock_type == 0) { @@ -152,7 +152,8 @@ void create_server(uint16_t port, int* final_socket, uint16_t* final_port, int s } char* type = (sock_type == 0) ? "TCP" : "UDP"; if (socket_descriptor < 0) { - lf_print_error_system_failure("Failed to create %s socket.", type); + lf_print_error("Failed to create %s socket.", type); + return -1; } set_socket_timeout_option(socket_descriptor, &timeout_time); int used_port = set_socket_bind_option(socket_descriptor, port); @@ -160,17 +161,21 @@ void create_server(uint16_t port, int* final_socket, uint16_t* final_port, int s // Enable listening for socket connections. // The second argument is the maximum number of queued socket requests, // which according to the Mac man page is limited to 128. - listen(socket_descriptor, 128); + if (listen(socket_descriptor, 128)) { + lf_print_error("Failed to listen on %d socket: %s.", socket_descriptor, strerror(errno)); + return -1; + } } *final_socket = socket_descriptor; *final_port = used_port; + return 0; } -void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - create_server(port, final_socket, final_port, 0); +int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { + return create_server(port, final_socket, final_port, 0); } -void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - create_server(port, final_socket, final_port, 1); +int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { + return create_server(port, final_socket, final_port, 1); } /** diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 9c088becf..482418ff3 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -93,8 +93,8 @@ int create_real_time_tcp_socket_errexit(); * @param final_socket The socket descriptor on which to accept connections. * @param final_port The final port of the TCP or UDP socket. */ -void create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); -void create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); +int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); +int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); /** * These two functions waits for an incoming connection request on the specified server socket. From a381b7e717bf6b0e3b5906146e4ee91c15ae6aa6 Mon Sep 17 00:00:00 2001 From: Dongha Kim <74869052+Jakio815@users.noreply.github.com> Date: Fri, 20 Dec 2024 12:15:55 -0700 Subject: [PATCH 37/50] Update include/core/federated/network/socket_common.h Co-authored-by: Edward A. Lee --- .../core/federated/network/socket_common.h | 25 +++++++++++-------- 1 file changed, 14 insertions(+), 11 deletions(-) diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 482418ff3..9c169929f 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -78,20 +78,23 @@ extern lf_mutex_t socket_mutex; int create_real_time_tcp_socket_errexit(); /** - * Create a TCP or UDP server and enable listening for socket connections. - * If the specified port if it is non-zero, it will attempt to acquire that port. - * If it fails, it will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with - * a delay of PORT_BIND_RETRY_INTERVAL in between. If the specified port is - * zero, then it will attempt to acquire DEFAULT_PORT first. If this fails, then it - * will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times, incrementing the port - * number between attempts, with no delay between attempts. Once it has incremented - * the port number MAX_NUM_PORT_ADDRESSES times, it will cycle around and begin again + * @brief Create a TCP or UDP server that listens for socket connections. + * + * If the specified port number is greater than one, this function will attempt to acquire that port. + * If the port number is zero, it delegates to the operating system to provide an available port number. + * If the port number is one, it will attempt to acquire DEFAULT_PORT. + * + * If acquiring the port fails, then this function will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times + * with a delay of PORT_BIND_RETRY_INTERVAL in between each try. + * If the specified port number is one, then it will increment the port number from DEFAULT_PORT on each attempt + * until it has incremented MAX_NUM_PORT_ADDRESSES times, at which point it will cycle around and begin again * with DEFAULT_PORT. * - * @param port The port number to use or 0 to start trying at DEFAULT_PORT. + * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. * @param socket_type The type of the socket for the server (TCP or UDP). - * @param final_socket The socket descriptor on which to accept connections. - * @param final_port The final port of the TCP or UDP socket. + * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. + * @param final_port Pointer to the final port the server will use. + * @return 0 for success, -1 for failure. */ int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); From 9d161a91d2bc7ea10efb1d90735569c5d69957ad Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:20:49 -0700 Subject: [PATCH 38/50] Add 'static' for private functions only used in the same C file. --- core/federated/network/socket_common.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index d07615482..84bebb09d 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -58,7 +58,7 @@ int create_real_time_tcp_socket_errexit() { * @param timeout_time A pointer to a `struct timeval` that specifies the timeout duration * for socket operations (receive and send). */ -void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { +static void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_time) { // Set the option for this socket to reuse the same address int true_variable = 1; // setsockopt() requires a reference to the value assigned to an option if (setsockopt(socket_descriptor, SOL_SOCKET, SO_REUSEADDR, &true_variable, sizeof(int32_t)) < 0) { @@ -84,7 +84,7 @@ void set_socket_timeout_option(int socket_descriptor, struct timeval* timeout_ti * until an available port is found. * @return The final port number used. */ -int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { +static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. @@ -182,7 +182,7 @@ int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { * Return true if either the socket to the RTI is broken or the socket is * alive and the first unread byte on the socket's queue is MSG_TYPE_FAILED. */ -bool check_socket_closed(int socket) { +static bool check_socket_closed(int socket) { unsigned char first_byte; ssize_t bytes = peek_from_socket(socket, &first_byte); if (bytes < 0 || (bytes == 1 && first_byte == MSG_TYPE_FAILED)) { @@ -192,7 +192,7 @@ bool check_socket_closed(int socket) { } } -int accept_socket(int socket, int rti_socket) { +static int accept_socket(int socket, int rti_socket) { struct sockaddr client_fd; // Wait for an incoming connection request. uint32_t client_length = sizeof(client_fd); From 5f2e48b1078d03ab45c8501909f8d60a2a6e2400 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:24:29 -0700 Subject: [PATCH 39/50] Add comments to create_UDP_server. --- include/core/federated/network/socket_common.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 9c169929f..6151f2b10 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -78,7 +78,7 @@ extern lf_mutex_t socket_mutex; int create_real_time_tcp_socket_errexit(); /** - * @brief Create a TCP or UDP server that listens for socket connections. + * @brief Create a TCP server that listens for socket connections. * * If the specified port number is greater than one, this function will attempt to acquire that port. * If the port number is zero, it delegates to the operating system to provide an available port number. @@ -91,12 +91,21 @@ int create_real_time_tcp_socket_errexit(); * with DEFAULT_PORT. * * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. - * @param socket_type The type of the socket for the server (TCP or UDP). * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. * @param final_port Pointer to the final port the server will use. * @return 0 for success, -1 for failure. */ int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); +/** + * @brief Create a UDP server that listens for socket connections. + * + * This function is just like create_TCP_server(), except that it creates a UDP server. + * + * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. + * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. + * @param final_port Pointer to the final port the server will use. + * @return 0 for success, -1 for failure. + */ int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); /** From 57f337caa9cf145d96d60831cf47e0a231b2b9a2 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:49:05 -0700 Subject: [PATCH 40/50] Revert rti's initial port to 0, and add a bool increment_port_on_retry. --- core/federated/RTI/rti_remote.c | 11 +++------- core/federated/federate.c | 2 +- core/federated/network/socket_common.c | 19 +++++++++-------- .../core/federated/network/socket_common.h | 21 ++++++++++--------- 4 files changed, 25 insertions(+), 28 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 6d7c44771..4f3a24781 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1510,7 +1510,7 @@ void initialize_federate(federate_info_t* fed, uint16_t id) { int32_t start_rti_server(uint16_t port) { _lf_initialize_clock(); // Create the TCP socket server - if (create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP)) { + if (create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP, true)) { lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); }; lf_print("RTI: Listening for federates."); @@ -1518,7 +1518,7 @@ int32_t start_rti_server(uint16_t port) { // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { if (create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, - &rti_remote->final_port_UDP)) { + &rti_remote->final_port_UDP, true)) { lf_print_error_system_failure("RTI failed to create UDP server: %s.", strerror(errno)); } } @@ -1588,12 +1588,7 @@ void initialize_RTI(rti_remote_t* rti) { rti_remote->num_feds_proposed_start = 0; rti_remote->all_federates_exited = false; rti_remote->federation_id = "Unidentified Federation"; - // Default values for user_specified_port are 0 for a federate and 1 for the RTI. Neither of these are valid port - // numbers, but rather specify that an available port needs to be found. With value 0, the operating system will - // provide an available port number. With value 1, the function will first try DEFAULT_PORT, which is 15045, and, - // if this fails, wait for time given by PORT_BIND_RETRY_INTERVAL and try again. It fails if this process fails after - // MAX_NUM_PORT_ADDRESSES tries. For more details, check this PR. https://github.com/lf-lang/reactor-c/pull/505. - rti_remote->user_specified_port = 1; + rti_remote->user_specified_port = 0; rti_remote->final_port_TCP = 0; rti_remote->socket_descriptor_TCP = -1; rti_remote->final_port_UDP = UINT16_MAX; diff --git a/core/federated/federate.c b/core/federated/federate.c index 0656da15e..7d51e1bcd 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1943,7 +1943,7 @@ void lf_connect_to_rti(const char* hostname, int port) { void lf_create_server(int specified_port) { assert(specified_port <= UINT16_MAX && specified_port >= 0); - if (create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port)) { + if (create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port, false)) { lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); }; LF_PRINT_LOG("Server for communicating with other federates started using port %d.", _fed.server_port); diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 84bebb09d..fa02da867 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -84,13 +84,13 @@ static void set_socket_timeout_option(int socket_descriptor, struct timeval* tim * until an available port is found. * @return The final port number used. */ -static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port) { +static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port, bool increment_port_on_retry) { // Server file descriptor. struct sockaddr_in server_fd; // Zero out the server address structure. bzero((char*)&server_fd, sizeof(server_fd)); uint16_t used_port = specified_port; - if (specified_port == 1) { // RTI is set to 1 if no specified port. + if (specified_port == 0 && increment_port_on_retry == true) { // RTI is set to 1 if no specified port. used_port = DEFAULT_PORT; } server_fd.sin_family = AF_INET; // IPv4 @@ -104,7 +104,7 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port int count = 1; while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { - if (specified_port == 1) { // RTI is set to 1 if no specified port. + if (specified_port == 0 && increment_port_on_retry == true) { // RTI is set to 1 if no specified port. lf_print_warning("RTI failed to get port %d.", used_port); used_port++; if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) @@ -135,7 +135,8 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port return used_port; } -static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type) { +static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type, + bool increment_port_on_retry) { int socket_descriptor; struct timeval timeout_time; if (sock_type == 0) { @@ -156,7 +157,7 @@ static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, return -1; } set_socket_timeout_option(socket_descriptor, &timeout_time); - int used_port = set_socket_bind_option(socket_descriptor, port); + int used_port = set_socket_bind_option(socket_descriptor, port, increment_port_on_retry); if (sock_type == 0) { // Enable listening for socket connections. // The second argument is the maximum number of queued socket requests, @@ -171,11 +172,11 @@ static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, return 0; } -int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - return create_server(port, final_socket, final_port, 0); +int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { + return create_server(port, final_socket, final_port, 0, increment_port_on_retry); } -int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port) { - return create_server(port, final_socket, final_port, 1); +int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { + return create_server(port, final_socket, final_port, 1, increment_port_on_retry); } /** diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 6151f2b10..205334a95 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -80,22 +80,23 @@ int create_real_time_tcp_socket_errexit(); /** * @brief Create a TCP server that listens for socket connections. * - * If the specified port number is greater than one, this function will attempt to acquire that port. - * If the port number is zero, it delegates to the operating system to provide an available port number. - * If the port number is one, it will attempt to acquire DEFAULT_PORT. - * - * If acquiring the port fails, then this function will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times - * with a delay of PORT_BIND_RETRY_INTERVAL in between each try. - * If the specified port number is one, then it will increment the port number from DEFAULT_PORT on each attempt - * until it has incremented MAX_NUM_PORT_ADDRESSES times, at which point it will cycle around and begin again + * If the specified port number is greater than zero, this function will attempt to acquire that port. + * If the specified port number is zero, and the increment_port_on_retry is true, it will attempt to acquire + * DEFAULT_PORT. If it fails to acquire DEFAULT_PORT, then it will increment the port number from DEFAULT_PORT on each + * attempt until it has incremented MAX_NUM_PORT_ADDRESSES times, at which point it will cycle around and begin again * with DEFAULT_PORT. + * If the port number is zero, and the increment_port_on_retry is false, it delegates to the operating system to provide + * an available port number. + * If acquiring the port fails, then this function will repeatedly attempt up to PORT_BIND_RETRY_LIMIT times with a + * delay of PORT_BIND_RETRY_INTERVAL in between each try. * * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. * @param final_port Pointer to the final port the server will use. + * @param increment_port_on_retry Boolean to retry port increment. * @return 0 for success, -1 for failure. */ -int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); +int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); /** * @brief Create a UDP server that listens for socket connections. * @@ -106,7 +107,7 @@ int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port); * @param final_port Pointer to the final port the server will use. * @return 0 for success, -1 for failure. */ -int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port); +int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); /** * These two functions waits for an incoming connection request on the specified server socket. From f564383fdddd593e98b3184178c761b04b52c1f7 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:53:11 -0700 Subject: [PATCH 41/50] Minor fix. --- core/federated/network/socket_common.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index fa02da867..a0b8e0747 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -90,7 +90,7 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port // Zero out the server address structure. bzero((char*)&server_fd, sizeof(server_fd)); uint16_t used_port = specified_port; - if (specified_port == 0 && increment_port_on_retry == true) { // RTI is set to 1 if no specified port. + if (specified_port == 0 && increment_port_on_retry == true) { used_port = DEFAULT_PORT; } server_fd.sin_family = AF_INET; // IPv4 @@ -104,7 +104,7 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port int count = 1; while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { - if (specified_port == 0 && increment_port_on_retry == true) { // RTI is set to 1 if no specified port. + if (specified_port == 0 && increment_port_on_retry == true) { lf_print_warning("RTI failed to get port %d.", used_port); used_port++; if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) @@ -120,17 +120,17 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port } // Set the global server port. - if (specified_port == 0) { // Federates are set to 0 if no specified port. + if (specified_port == 0 && increment_port_on_retry == false) { // Need to retrieve the port number assigned by the OS. struct sockaddr_in assigned; socklen_t addr_len = sizeof(assigned); if (getsockname(socket_descriptor, (struct sockaddr*)&assigned, &addr_len) < 0) { - lf_print_error_and_exit("Failed to retrieve assigned port number."); + lf_print_error_and_exit("Federate fdailed to retrieve assigned port number."); } used_port = ntohs(assigned.sin_port); } if (result != 0) { - lf_print_error_and_exit("Failed to bind the RTI socket. Port %d is not available. ", used_port); + lf_print_error_and_exit("Failed to bind the socket. Port %d is not available. ", used_port); } return used_port; } From 6042fbc0021bee9e1d59b2fc5b2e107db290d4b5 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 12:57:25 -0700 Subject: [PATCH 42/50] Minor update on comments. --- core/federated/network/socket_common.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index a0b8e0747..45966dfc7 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -74,14 +74,11 @@ static void set_socket_timeout_option(int socket_descriptor, struct timeval* tim } /** - * Set the socket bind options. If the specified port is 0, it means this is a federate socket server. If the specified - * port is 1, it is creating a RTI server. RTI servers use the port increment when the default port is not available. - * Returns the actually used port. + * Assign a port to the socket, and bind the socket. * * @param socket_descriptor The file descriptor of the socket to be bound to an address and port. - * @param specified_port The port number to bind the socket to. If set to 0, the OS assigns a port. - * If set to 1, the function starts binding at the `DEFAULT_PORT` and increments - * until an available port is found. + * @param specified_port The port number to bind the socket to. + * @param increment_port_on_retry Boolean to retry port increment. * @return The final port number used. */ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port, bool increment_port_on_retry) { @@ -99,12 +96,11 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port int result = bind(socket_descriptor, (struct sockaddr*)&server_fd, sizeof(server_fd)); - // Try repeatedly to bind to a port. If no specific port is specified, then - // increment the port number each time. - + // Try repeatedly to bind to a port. int count = 1; while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { if (specified_port == 0 && increment_port_on_retry == true) { + // If the specified port number is zero, and the increment_port_on_retry is true, increment the port number each time. lf_print_warning("RTI failed to get port %d.", used_port); used_port++; if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) From 4f183a42093b4983b4e513cd830edaaffba0a7ab Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 13:03:30 -0700 Subject: [PATCH 43/50] Fix typo, and add debug messages. --- core/federated/network/socket_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 45966dfc7..023000049 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -121,13 +121,14 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port struct sockaddr_in assigned; socklen_t addr_len = sizeof(assigned); if (getsockname(socket_descriptor, (struct sockaddr*)&assigned, &addr_len) < 0) { - lf_print_error_and_exit("Federate fdailed to retrieve assigned port number."); + lf_print_error_and_exit("Federate failed to retrieve assigned port number."); } used_port = ntohs(assigned.sin_port); } if (result != 0) { lf_print_error_and_exit("Failed to bind the socket. Port %d is not available. ", used_port); } + lf_print_debug("Socket is binded to port %d.", used_port); return used_port; } From fbbb1fbb73fa017dc167d0a2e2eac994a3408e4d Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 13:04:12 -0700 Subject: [PATCH 44/50] Fix formatting --- core/federated/network/socket_common.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 023000049..50cfbd1a2 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -100,7 +100,8 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port int count = 1; while (result != 0 && count++ < PORT_BIND_RETRY_LIMIT) { if (specified_port == 0 && increment_port_on_retry == true) { - // If the specified port number is zero, and the increment_port_on_retry is true, increment the port number each time. + // If the specified port number is zero, and the increment_port_on_retry is true, increment the port number each + // time. lf_print_warning("RTI failed to get port %d.", used_port); used_port++; if (used_port >= DEFAULT_PORT + MAX_NUM_PORT_ADDRESSES) From 5a26cf5be43c7d3b0bfe8d3fe9816bc9057524d5 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 13:23:19 -0700 Subject: [PATCH 45/50] Make a single API function accept_socket() --- core/federated/RTI/rti_remote.c | 4 ++-- core/federated/federate.c | 2 +- core/federated/network/socket_common.c | 6 ++---- include/core/federated/network/socket_common.h | 10 +++++----- 4 files changed, 10 insertions(+), 12 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 4f3a24781..c5bd02955 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1413,7 +1413,7 @@ static bool authenticate_federate(int* socket) { void lf_connect_to_federates(int socket_descriptor) { for (int i = 0; i < rti_remote->base.number_of_scheduling_nodes; i++) { - int socket_id = accept_rti_socket(rti_remote->socket_descriptor_TCP); + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); // Wait for the first message from the federate when RTI -a option is on. #ifdef __RTI_AUTH__ if (rti_remote->authentication_enabled) { @@ -1473,7 +1473,7 @@ void* respond_to_erroneous_connections(void* nothing) { // Wait for an incoming connection request. // The following will block until either a federate attempts to connect // or close(rti->socket_descriptor_TCP) is called. - int socket_id = accept_rti_socket(rti_remote->socket_descriptor_TCP); + int socket_id = accept_socket(rti_remote->socket_descriptor_TCP, -1); if (socket_id < 0) { return NULL; } diff --git a/core/federated/federate.c b/core/federated/federate.c index 7d51e1bcd..63caef40f 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1995,7 +1995,7 @@ void* lf_handle_p2p_connections_from_federates(void* env_arg) { _fed.inbound_socket_listeners = (lf_thread_t*)calloc(_fed.number_of_inbound_p2p_connections, sizeof(lf_thread_t)); while (received_federates < _fed.number_of_inbound_p2p_connections && !_lf_termination_executed) { // Wait for an incoming connection request. - int socket_id = accept_federate_socket(_fed.server_socket, _fed.socket_TCP_RTI); + int socket_id = accept_socket(_fed.server_socket, _fed.socket_TCP_RTI); if (socket_id < 0) { lf_print_warning("Federate failed to accept the socket."); return NULL; diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 50cfbd1a2..406f45c66 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -173,6 +173,7 @@ static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { return create_server(port, final_socket, final_port, 0, increment_port_on_retry); } + int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { return create_server(port, final_socket, final_port, 1, increment_port_on_retry); } @@ -191,7 +192,7 @@ static bool check_socket_closed(int socket) { } } -static int accept_socket(int socket, int rti_socket) { +int accept_socket(int socket, int rti_socket) { struct sockaddr client_fd; // Wait for an incoming connection request. uint32_t client_length = sizeof(client_fd); @@ -223,9 +224,6 @@ static int accept_socket(int socket, int rti_socket) { return socket_id; } -int accept_rti_socket(int socket) { return accept_socket(socket, -1); } -int accept_federate_socket(int socket, int rti_socket) { return accept_socket(socket, rti_socket); } - int connect_to_socket(int sock, const char* hostname, int port) { struct addrinfo hints; struct addrinfo* result; diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 205334a95..71570c125 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -113,16 +113,16 @@ int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bo * These two functions waits for an incoming connection request on the specified server socket. * It blocks until a connection is successfully accepted. If an error occurs that is not * temporary (e.g., `EAGAIN` or `EWOULDBLOCK`), it reports the error and exits. Temporary - * errors cause the function to retry accepting the connection. The accept_federate_socket() function additionally - * checks the RTI's server socket if it is still alive. + * errors cause the function to retry accepting the connection. + * If the rti_socket is not -1, it checks if the RTI's server socket is still open. * * @param socket The server socket file descriptor that is listening for incoming connections. * @param rti_socket The rti socket for the federate to check if it is still open. - * @return int The file descriptor for the newly accepted socket on success, or -1 on failure + * @return The file descriptor for the newly accepted socket on success, or -1 on failure * (with an appropriate error message printed). */ -int accept_rti_socket(int socket); -int accept_federate_socket(int socket, int rti_socket); + +int accept_socket(int socket, int rti_socket); /** * From 7eb4d55f980cf7186870a7e167bfe78eacb3cff3 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 20 Dec 2024 13:50:47 -0700 Subject: [PATCH 46/50] Add more logs. --- core/federated/federate.c | 2 +- core/federated/network/socket_common.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/core/federated/federate.c b/core/federated/federate.c index 63caef40f..4b12f8b53 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1818,7 +1818,7 @@ void lf_connect_to_federate(uint16_t remote_federate_id) { remote_federate_id, ADDRESS_QUERY_RETRY_INTERVAL); continue; } else { - lf_print("Connected to federate %d, port %d.", remote_federate_id, port); + lf_print("Connected to federate %d, port %hu.", remote_federate_id, uport); // Trace the event when tracing is enabled tracepoint_federate_to_federate(receive_ACK, _lf_my_fed_id, remote_federate_id, NULL); break; diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 406f45c66..20fff0585 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -266,8 +266,8 @@ int connect_to_socket(int sock, const char* hostname, int port) { used_port = DEFAULT_PORT; } } - lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds.\n", - CONNECT_RETRY_INTERVAL); + lf_print_warning("Could not connect. Will try again every " PRINTF_TIME " nanoseconds. Connecting to port %d.\n", + CONNECT_RETRY_INTERVAL, used_port); continue; } else { break; From 9b8b7d6e5963beab5c527a9d2bb083c348ec7761 Mon Sep 17 00:00:00 2001 From: Dongha Kim <74869052+Jakio815@users.noreply.github.com> Date: Tue, 24 Dec 2024 06:11:04 +0900 Subject: [PATCH 47/50] Apply suggestions from code review Co-authored-by: Edward A. Lee --- .../core/federated/network/socket_common.h | 21 +++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 71570c125..4d5aeb00d 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -2,7 +2,10 @@ #define SOCKET_COMMON_H #include "low_level_platform.h" -#define NUM_SOCKET_RETRIES 10 + +/** + * The amount of time to wait after a failed socket read or write before trying again. This defaults to 100 ms. + */ #define DELAY_BETWEEN_SOCKET_RETRIES MSEC(100) /** @@ -97,6 +100,7 @@ int create_real_time_tcp_socket_errexit(); * @return 0 for success, -1 for failure. */ int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); + /** * @brief Create a UDP server that listens for socket connections. * @@ -105,16 +109,21 @@ int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bo * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. * @param final_port Pointer to the final port the server will use. + * @param increment_port_on_retry Boolean to retry port increment. * @return 0 for success, -1 for failure. */ int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); /** - * These two functions waits for an incoming connection request on the specified server socket. - * It blocks until a connection is successfully accepted. If an error occurs that is not + * Wait for an incoming connection request on the specified server socket. + * This blocks until a connection is successfully accepted. If an error occurs that is not * temporary (e.g., `EAGAIN` or `EWOULDBLOCK`), it reports the error and exits. Temporary * errors cause the function to retry accepting the connection. - * If the rti_socket is not -1, it checks if the RTI's server socket is still open. + * + * If the `rti_socket` is not -1, this function checks whether the specified socket is still open. + * If it is not open, then this function returns -1. + * This is useful for federates to determine whether they are still connected to the federation + * and to stop waiting when they are not. * * @param socket The server socket file descriptor that is listening for incoming connections. * @param rti_socket The rti socket for the federate to check if it is still open. @@ -126,8 +135,8 @@ int accept_socket(int socket, int rti_socket); /** * - * This function attempts to establish a TCP connection to the specified hostname - * and port. It uses `getaddrinfo` to resolve the hostname and retries the connection + * Attempt to establish a TCP connection to the specified hostname + * and port. This function uses `getaddrinfo` to resolve the hostname and retries the connection * periodically if it fails. If the specified port is 0, it iterates through a range * of default ports starting from `DEFAULT_PORT`. The function will stop retrying * if the `CONNECT_TIMEOUT` is reached. From b2a5af8206f7a973d04eba8661d8978889fd2ed9 Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 27 Dec 2024 07:37:22 +0900 Subject: [PATCH 48/50] Make create_server to one integrated function of TCP and UDP. --- core/federated/RTI/rti_remote.c | 6 +++--- core/federated/RTI/rti_remote.h | 3 +-- core/federated/federate.c | 2 +- core/federated/network/socket_common.c | 17 ++++------------- include/core/federated/network/socket_common.h | 16 ++++------------ 5 files changed, 13 insertions(+), 31 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 2ca3ad139..4d38d416f 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1508,15 +1508,15 @@ void initialize_federate(federate_info_t* fed, uint16_t id) { int32_t start_rti_server(uint16_t port) { _lf_initialize_clock(); // Create the TCP socket server - if (create_TCP_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP, true)) { + if (create_server(port, &rti_remote->socket_descriptor_TCP, &rti_remote->final_port_TCP, TCP, true)) { lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); }; lf_print("RTI: Listening for federates."); // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - if (create_UDP_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, - &rti_remote->final_port_UDP, true)) { + if (create_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, + &rti_remote->final_port_UDP, UDP, true)) { lf_print_error_system_failure("RTI failed to create UDP server: %s.", strerror(errno)); } } diff --git a/core/federated/RTI/rti_remote.h b/core/federated/RTI/rti_remote.h index 000fabcac..de6b144aa 100644 --- a/core/federated/RTI/rti_remote.h +++ b/core/federated/RTI/rti_remote.h @@ -31,6 +31,7 @@ #include "lf_types.h" #include "pqueue_tag.h" +#include "socket_common.h" /** Time allowed for federates to reply to stop request. */ #define MAX_TIME_FOR_REPLY_TO_STOP_REQUEST SEC(30) @@ -38,8 +39,6 @@ ///////////////////////////////////////////// //// Data structures -typedef enum socket_type_t { TCP, UDP } socket_type_t; - /** * Information about a federate known to the RTI, including its runtime state, * mode of execution, and connectivity with other federates. diff --git a/core/federated/federate.c b/core/federated/federate.c index f8f13d740..fc2f86911 100644 --- a/core/federated/federate.c +++ b/core/federated/federate.c @@ -1942,7 +1942,7 @@ void lf_connect_to_rti(const char* hostname, int port) { void lf_create_server(int specified_port) { assert(specified_port <= UINT16_MAX && specified_port >= 0); - if (create_TCP_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port, false)) { + if (create_server(specified_port, &_fed.server_socket, (uint16_t*)&_fed.server_port, TCP, false)) { lf_print_error_system_failure("RTI failed to create TCP server: %s.", strerror(errno)); }; LF_PRINT_LOG("Server for communicating with other federates started using port %d.", _fed.server_port); diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 20fff0585..1d6945346 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -133,11 +133,10 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port return used_port; } -static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, int sock_type, - bool increment_port_on_retry) { +int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, bool increment_port_on_retry) { int socket_descriptor; struct timeval timeout_time; - if (sock_type == 0) { + if (sock_type == TCP) { // Create an IPv4 socket for TCP. socket_descriptor = create_real_time_tcp_socket_errexit(); // Set the timeout time for the communications of the server @@ -149,14 +148,14 @@ static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, timeout_time = (struct timeval){.tv_sec = UDP_TIMEOUT_TIME / BILLION, .tv_usec = (UDP_TIMEOUT_TIME % BILLION) / 1000}; } - char* type = (sock_type == 0) ? "TCP" : "UDP"; + char* type = (sock_type == TCP) ? "TCP" : "UDP"; if (socket_descriptor < 0) { lf_print_error("Failed to create %s socket.", type); return -1; } set_socket_timeout_option(socket_descriptor, &timeout_time); int used_port = set_socket_bind_option(socket_descriptor, port, increment_port_on_retry); - if (sock_type == 0) { + if (sock_type == TCP) { // Enable listening for socket connections. // The second argument is the maximum number of queued socket requests, // which according to the Mac man page is limited to 128. @@ -170,14 +169,6 @@ static int create_server(uint16_t port, int* final_socket, uint16_t* final_port, return 0; } -int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { - return create_server(port, final_socket, final_port, 0, increment_port_on_retry); -} - -int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry) { - return create_server(port, final_socket, final_port, 1, increment_port_on_retry); -} - /** * Return true if either the socket to the RTI is broken or the socket is * alive and the first unread byte on the socket's queue is MSG_TYPE_FAILED. diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 71570c125..a9e744791 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -63,6 +63,8 @@ */ #define MSG_TYPE_FAILED 25 +typedef enum socket_type_t { TCP, UDP } socket_type_t; + /** * Mutex protecting socket close operations. */ @@ -93,21 +95,11 @@ int create_real_time_tcp_socket_errexit(); * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. * @param final_port Pointer to the final port the server will use. + * @param sock_type Type of the socket wheter TCP or UDP. * @param increment_port_on_retry Boolean to retry port increment. * @return 0 for success, -1 for failure. */ -int create_TCP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); -/** - * @brief Create a UDP server that listens for socket connections. - * - * This function is just like create_TCP_server(), except that it creates a UDP server. - * - * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. - * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. - * @param final_port Pointer to the final port the server will use. - * @return 0 for success, -1 for failure. - */ -int create_UDP_server(uint16_t port, int* final_socket, uint16_t* final_port, bool increment_port_on_retry); +int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, bool increment_port_on_retry); /** * These two functions waits for an incoming connection request on the specified server socket. From 817d2f9d5796ba992c8dbfce76090aa3a23300ed Mon Sep 17 00:00:00 2001 From: Dongha Kim Date: Fri, 27 Dec 2024 08:12:38 +0900 Subject: [PATCH 49/50] Fix formatting. --- core/federated/RTI/rti_remote.c | 4 ++-- core/federated/network/socket_common.c | 3 ++- include/core/federated/network/socket_common.h | 3 ++- 3 files changed, 6 insertions(+), 4 deletions(-) diff --git a/core/federated/RTI/rti_remote.c b/core/federated/RTI/rti_remote.c index 4d38d416f..6f705d2b9 100644 --- a/core/federated/RTI/rti_remote.c +++ b/core/federated/RTI/rti_remote.c @@ -1515,8 +1515,8 @@ int32_t start_rti_server(uint16_t port) { // Create the UDP socket server // Try to get the rti_remote->final_port_TCP + 1 port if (rti_remote->clock_sync_global_status >= clock_sync_on) { - if (create_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, - &rti_remote->final_port_UDP, UDP, true)) { + if (create_server(rti_remote->final_port_TCP + 1, &rti_remote->socket_descriptor_UDP, &rti_remote->final_port_UDP, + UDP, true)) { lf_print_error_system_failure("RTI failed to create UDP server: %s.", strerror(errno)); } } diff --git a/core/federated/network/socket_common.c b/core/federated/network/socket_common.c index 1d6945346..cc71e897b 100644 --- a/core/federated/network/socket_common.c +++ b/core/federated/network/socket_common.c @@ -133,7 +133,8 @@ static int set_socket_bind_option(int socket_descriptor, uint16_t specified_port return used_port; } -int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, bool increment_port_on_retry) { +int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, + bool increment_port_on_retry) { int socket_descriptor; struct timeval timeout_time; if (sock_type == TCP) { diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index c3465c2f5..9d8a71d57 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -102,7 +102,8 @@ int create_real_time_tcp_socket_errexit(); * @param increment_port_on_retry Boolean to retry port increment. * @return 0 for success, -1 for failure. */ -int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, bool increment_port_on_retry); +int create_server(uint16_t port, int* final_socket, uint16_t* final_port, socket_type_t sock_type, + bool increment_port_on_retry); /** * Wait for an incoming connection request on the specified server socket. From 106c827bb9c854cfeb3fa6f9307ed834bc953e26 Mon Sep 17 00:00:00 2001 From: Dongha Kim <74869052+Jakio815@users.noreply.github.com> Date: Sat, 28 Dec 2024 05:52:35 +0900 Subject: [PATCH 50/50] Update include/core/federated/network/socket_common.h Co-authored-by: Edward A. Lee --- include/core/federated/network/socket_common.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/core/federated/network/socket_common.h b/include/core/federated/network/socket_common.h index 9d8a71d57..9c6138e05 100644 --- a/include/core/federated/network/socket_common.h +++ b/include/core/federated/network/socket_common.h @@ -98,7 +98,7 @@ int create_real_time_tcp_socket_errexit(); * @param port The port number to use or 0 to let the OS pick or 1 to start trying at DEFAULT_PORT. * @param final_socket Pointer to the returned socket descriptor on which accepting connections will occur. * @param final_port Pointer to the final port the server will use. - * @param sock_type Type of the socket wheter TCP or UDP. + * @param sock_type Type of the socket, TCP or UDP. * @param increment_port_on_retry Boolean to retry port increment. * @return 0 for success, -1 for failure. */