# fixtcp
# dennis bednar jan 24 86	dennis@rlgvax.uucp
#
# Unhang tcp connections which are stuck in the FIN_WAIT2 state
# These connections can be seen by doing a 4.2 netstat -a command.
#
# Usage:
# invoke as "fixtcp" to display kernel stuff for connections.
# "fixtcp" by itself is HIGHLY RECOMMENDED for the first time!
#
# invoke as "fixtcp fix" to patch kernel memory - you must be root.
# Then do a netstat -a command, and it should have gone away.
#
# CCI only symptom:
# A symptom of this problem is that "startoftp" goes wild restarting
# the receive daemon, and you see a lot of rcvlog.pid files being
# created in the oftp spool directory.
#
# Symptom for everybody else:
# In general, a symptom of this problem is that a tcpopen passive
# will fail with the errno UNIX reason being "Address Already In Use".
#
#
# To correct OFTP problem (CCI only):
# su root
# killoftp; fixtcp fix; startoftp
#
# Internals of how this script works:
# Works by loading the 2 * msl timer (addr+16) in the Connection Control Block
# with a 1, which means it will time out in 1/2 second from now, and
# enter the CLOSE state, and the the CCB will be freed (so you will not
# see it with netstat -a).
# The proper offset for the 2 * msl timer can be seen by examining
# /usr/include/netinet/tcp_var.h include file, plus other tcp*.h files
# in the same directory.
#
# relies on
#	_get_tcp_	a.out file that returns the offset of various
#			fields in a connecton control block.
#			There is a _get_tcp_.c file to create this.
#			This was created to avoid problems of offsets
#			being site-dependent, if your OS uses different
#			offsets.
#
#

# don't print full path name of command in error messages
cmd=`basename $0`

# name of state to look for in the netstat command
# state=ESTABLISHED	# debugging
state=FIN_WAIT_2	# really

# get the values of the offsets of the fields the the structure for adb
stateoff=`_get_tcp_ state`	# probably 0x8
timer2msloff=`_get_tcp_ 2msl`	# probably 0x10
FIN_WAIT2=`_get_tcp_ FIN_WAIT2`	# probably 9
FIN_CLOSE=`_get_tcp_ TIME_CLOSE`	# probably 10



# remove temp file if SIGHUP, SIGINT, SIGTERM
trap "echo $cmd: interrupted; rm /tmp/fixtcp.$$; exit 1" 1 2 15

# get kernel address of TCP CCB's in FIN_WAIT2 and save in a temporary file
netstat -A | grep $state | sed '1,$s/ .*//p' >/tmp/fixtcp.$$

# check if we got any addresses
if [ ! -s /tmp/fixtcp.$$ ]
then
#	file doesn't exist or is zero in length, therefore no addresses
	echo "$cmd: Sorry, no tcp connections stuck in $state state."
	rm /tmp/fixtcp.$$
	exit 0
fi

echo "Before: only connections in state $state"
netstat -a | grep $state


# cat /tmp/fixtcp.$$	# debug

# see if we want to patch kernel memory or just display it
if [ "$1" = "fix" ]
then
#	patch by writing
	for addr in `cat /tmp/fixtcp.$$`
	do
adb -w /vmunix /dev/kmem <<EOF
0x$addr+$timer2msloff/w 1
\$q
EOF
	done

	sleep 2			# wait for connection to clear

#	make sure it really got unstuck
	netstat -a | grep $state >/tmp/fixtcp.$$
	if [ -s /tmp/fixtcp.$$ ]	# file exists and size > 0
	then
		echo "$cmd: Sorry, TCP connections still hung!!"
		rm /tmp/fixtcp.$$
		exit 1
	else
		echo "$cmd: TCP connections in state $state have been unstuck."
	fi
else
#	just display the current state flag and current 2 * msl timer
	for addr in `cat /tmp/fixtcp.$$`
	do
		echo "The next two numbers displayed by adb should be $FIN_WAIT2 and 0."
		echo "The state flag value of $FIN_WAIT2 represents the FIN_WAIT_2 state."
		echo "The decimal 0 means the 2 * msl timer is off."
adb /vmunix /dev/kmem <<EOF
0x$addr+8/d
0x$addr+0x10/d
\$q
EOF
	done
fi

# cleanup intermediate file
rm /tmp/fixtcp.$$

echo "After: only connections in state $state"
netstat -a | grep $state
exit 0
