From 171b3a5c6827cc61ff8b11d9c05369e284bfbeec Mon Sep 17 00:00:00 2001 From: Angela Stegmaier Date: Tue, 5 Jun 2018 14:57:01 -0500 Subject: [PATCH] lad: Add Timeout waiting for Response FIFO Add a timeout to the logic which opens the client response FIFO in the LAD daemon during LAD_CONNECT. There are some corner cases that might result in this hanging forever waiting for the response FIFO, in which case, all other LAD communication will be blocked forever. Such cases might be: - Client application crashes after sending LAD_connect request - Communication with LAD through the LADCMDS fifo takes too long and the Client times out waiting for the response FIFO (5 seconds). If this happens, Client will never try to open the FIFO again due to failing when doing the stat() on the FIFO. With this patch, if the FIFO connection is not established within the timeout, the FIFO will be unlinked and the original state restored so that the connection can be attempted again. The timeout is chosen as 1 second so as not to block all other LAD communication for a long period of time. 1 second should (hopefully) be sufficient time for the Client and LAD to both open the FIFO. Signed-off-by: Angela Stegmaier --- linux/include/_lad.h | 1 + linux/src/daemon/lad.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) diff --git a/linux/include/_lad.h b/linux/include/_lad.h index 4bdfd8e..0124608 100644 --- a/linux/include/_lad.h +++ b/linux/include/_lad.h @@ -147,6 +147,7 @@ extern struct timeval start_tv; #define LAD_MAXNUMCLIENTS 32 /* max simultaneous clients */ #define LAD_CONNECTTIMEOUT 5.0 /* LAD connect response timeout (sec) */ #define LAD_DISCONNECTTIMEOUT 5.0 /* LAD disconnect timeout (sec) */ +#define LAD_RESPONSEFIFOTIMEOUT 1.0 /* LAD timeout for opening the response fifo */ #define LAD_MAXLENGTHFIFONAME 128 /* max length client FIFO name */ #define LAD_MAXLENGTHCOMMAND 512 /* size limit for LAD command string */ #define LAD_MAXLENGTHRESPONSE 512 /* size limit for LAD response string */ diff --git a/linux/src/daemon/lad.c b/linux/src/daemon/lad.c index 758e098..4160400 100644 --- a/linux/src/daemon/lad.c +++ b/linux/src/daemon/lad.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include <_MessageQ.h> @@ -1065,6 +1066,11 @@ static Int connectToLAD(String clientName, Int pid, String clientProto, Int *cli Int clientId = -1; Bool connectDenied = FALSE; Int status = LAD_SUCCESS; + time_t currentTime; + time_t startTime; + struct stat statBuf; + double delta = 0; + int filePtrFd = 0; FILE * filePtr; Int statusIO; @@ -1133,7 +1139,30 @@ openResponseFIFO: /* set FIFO permissions to read/write */ chmod(clientName, 0666); + startTime = time ((time_t *) 0); + while (delta <= LAD_RESPONSEFIFOTIMEOUT) { + /* open a file for writing to FIFO, non-blocking so we can timeout */ + filePtrFd = open(clientName, O_WRONLY | O_TRUNC | O_NONBLOCK); + if (filePtrFd != -1) { + break; + } + LOG1("\nWARN: Client %s has not yet opened, will retry\n", clientName) + usleep(100); + currentTime = time ((time_t *) 0); + delta = difftime(currentTime, startTime); + } + + if (delta > LAD_RESPONSEFIFOTIMEOUT) { + LOG1("\nERROR: timed out waiting for Client to open response FIFO %s\n", clientName); + unlink(clientName); + status = LAD_IOFAILURE; + goto doneconnect; + } + filePtr = fopen(clientName, "w"); + + close(filePtrFd); + if (filePtr == NULL) { LOG1("\nERROR: unable to open response FIFO %s\n", clientName) -- 2.39.2