/*	ip_input.c	1.45	82/06/20	*/

#include "param.h"
#include <sys/systm.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
#include <sys/socket.h>
#include <netinet/in.h>
#include <netinet/in_systm.h>
#include <net/if.h>
#include <netinet/ip.h>			/* belongs before in.h */
#include <netinet/ip_var.h>
#include <netinet/ip_icmp.h>
#include <netinet/tcp.h>
#include <netinet/ip_acct.h>		/* special ip accounting */
#include <errno.h>

extern	u_long	LocalAddr;		/* Generic net addr of this host */

u_char	ip_protox[IPPROTO_MAX];
int	ipqmaxlen = IFQ_MAXLEN;
struct	ifnet *ifinet;			/* first inet interface */

#ifdef	IP_ACCT				/* any ip accounting */
struct	ip_acct in_pkt[N_IPHOSTS];	/* input packet count array */
struct	ip_acct out_pkt[N_IPHOSTS];	/* output packet count array */
struct	ifnet	*enifp;			/* ethernet interface ptr */
#endif	IP_ACCT

/*
 * IP initialization: fill in IP protocol switch table.
 * All protocols not implemented in kernel go to raw IP protocol handler.
 */
ip_init()
{
	register struct protosw *pr;
	register int i;

	pr = pffindproto(PF_INET, IPPROTO_RAW);
	if (pr == 0)
		panic("ip_init");
	for (i = 0; i < IPPROTO_MAX; i++)
		ip_protox[i] = pr - protosw;
	for (pr = protosw; pr <= protoswLAST; pr++)
		if (pr->pr_family == PF_INET &&
		    pr->pr_protocol && pr->pr_protocol != IPPROTO_RAW)
			ip_protox[pr->pr_protocol] = pr - protosw;
	ipq.next = ipq.prev = &ipq;
	ip_id = time & 0xffff;
	ipintrq.ifq_maxlen = ipqmaxlen;
	ifinet = if_ifwithaf(AF_INET);
#ifdef	IP_ACCT
	enifp = ifunit(INTERFACE);	/* get ethernet interface ptr */
	bzero((caddr_t)&in_pkt, sizeof in_pkt);
	bzero((caddr_t)&out_pkt, sizeof out_pkt);
#endif	IP_ACCT
}

u_char	ipcksum = 1;
struct	ip *ip_reass();
struct	sockaddr_in ipaddr = { AF_INET };

/*
 * Ip input routine.  Checksum and byte swap header.  If fragmented
 * try to reassamble.  If complete and fragment queue exists, discard.
 * Process options.  Pass to next level.
 */
ipintr()
{
	register struct ip *ip;
	register struct mbuf *m;
	struct mbuf *m0, *mopt;
	register int i;
	register struct ipq *fp;
	int hlen, s;

next:
	/*
	 * Get next datagram off input queue and get IP header
	 * in first mbuf.
	 */
	s = splimp();
	IF_DEQUEUE(&ipintrq, m);
	splx(s);
	if (m == 0)
		return;
	if ((m->m_off > MMAXOFF || m->m_len < sizeof (struct ip)) &&
	    (m = m_pullup(m, sizeof (struct ip))) == 0)
		return;
	ip = mtod(m, struct ip *);
	if ((hlen = ip->ip_hl << 2) > m->m_len) {
		if ((m = m_pullup(m, hlen)) == 0)
			return;
		ip = mtod(m, struct ip *);
	}
	if (ipcksum)
		if (ip->ip_sum = in_cksum(m, hlen)) {
			printf("ip_sum %x\n", ip->ip_sum);	/* XXX */
			ipstat.ips_badsum++;
			goto bad;
		}

#if vax || pdp11
	/*
	 * Convert fields to host representation.
	 */
	ip->ip_len = ntohs((u_short)ip->ip_len);
	ip->ip_id = ntohs(ip->ip_id);
	ip->ip_off = ntohs((u_short)ip->ip_off);
#endif
	if (ip->ip_len < hlen) {
		ipstat.ips_badlen++;
		goto bad;
	}

	/*
	 * Check that the amount of data in the buffers
	 * is as at least much as the IP header would have us expect.
	 * Trim mbufs if longer than we expect.
	 * Drop packet if shorter than we expect.
	 */
	i = -ip->ip_len;
	m0 = m;
	for (;;) {
		i += m->m_len;
		if (m->m_next == 0)
			break;
		m = m->m_next;
	}
	if (i != 0) {
		if (i < 0) {
			ipstat.ips_tooshort++;
			m = m0;	/* Usenet 4.2 bug posting - sm */
			goto bad;
		}
		if (i <= m->m_len)
			m->m_len -= i;
		else
			m_adj(m0, -i);
	}
	m = m0;

#ifdef	IP_ACCT
	if (enifp && enifp->if_net == in_netof(ip->ip_src)) {
		register int i, found;

		found = 0;
		for (i=0; i < N_IPHOSTS && in_pkt[i].pkt_cnt; i++) {
			if (bcmp((caddr_t)in_pkt[i].c_d,
				 (caddr_t)&ip->ip_src.s_imp, 2) == 0) {
				found++;
				in_pkt[i].pkt_cnt++;
				break;
			}
		}
		if (!found && i < N_IPHOSTS) {
			bcopy((caddr_t)&ip->ip_src.s_imp,
			      (caddr_t)in_pkt[i].c_d, 2);
			in_pkt[i].pkt_cnt++;
		}
	}
#endif	IP_ACCT

	/*
	 * Process options and, if not destined for us,
	 * ship it on.  ip_dooptions returns 1 when an
	 * error was detected (causing an icmp message
	 * to be sent).
	 */
	if (hlen > sizeof (struct ip) && ip_dooptions(ip))
		goto next;

	/*
	 * Check for a generic local address
	 * davec, tek, 3/83
	 */
	if (bcmp(&ip->ip_dst.s_addr,&LocalAddr,sizeof(LocalAddr)) == 0)
		goto ours;

	/*
	 * Fast check on the first internet
	 * interface in the list.
	 */
	if (ifinet) {
		struct sockaddr_in *sin;

		sin = (struct sockaddr_in *)&ifinet->if_addr;
		if (sin->sin_addr.s_addr == ip->ip_dst.s_addr)
			goto ours;
		sin = (struct sockaddr_in *)&ifinet->if_broadaddr;
		if ((ifinet->if_flags & IFF_BROADCAST) &&
		    sin->sin_addr.s_addr == ip->ip_dst.s_addr)
			goto ours;
	}
	ipaddr.sin_addr = ip->ip_dst;
	if (if_ifwithaddr((struct sockaddr *)&ipaddr) == 0) {
		ip_forward(ip);
		goto next;
	}

ours:
	/*
	 * Look for queue of fragments
	 * of this datagram.
	 */
	for (fp = ipq.next; fp != &ipq; fp = fp->next)
		if (ip->ip_id == fp->ipq_id &&
		    ip->ip_src.s_addr == fp->ipq_src.s_addr &&
		    ip->ip_dst.s_addr == fp->ipq_dst.s_addr &&
		    ip->ip_p == fp->ipq_p)
			goto found;
	fp = 0;
found:

	/*
	 * Adjust ip_len to not reflect header,
	 * set ip_mff if more fragments are expected,
	 * convert offset of this to bytes.
	 */
	ip->ip_len -= hlen;
	((struct ipasfrag *)ip)->ipf_mff = 0;
	if (ip->ip_off & IP_MF)
		((struct ipasfrag *)ip)->ipf_mff = 1;
	ip->ip_off <<= 3;

	/*
	 * If datagram marked as having more fragments
	 * or if this is not the first fragment,
	 * attempt reassembly; if it succeeds, proceed.
	 */
	if (((struct ipasfrag *)ip)->ipf_mff || ip->ip_off) {
		ip = ip_reass((struct ipasfrag *)ip, fp);
		if (ip == 0)
			goto next;
		hlen = ip->ip_hl << 2;
		m = dtom(ip);
	} else
		if (fp)
			(void) ip_freef(fp);

	/*
	 * Switch out to protocol's input routine.
	 */
	(*protosw[ip_protox[UCHAR(ip->ip_p)]].pr_input)(m);
	goto next;
bad:
	m_freem(m);
	goto next;
}

#if pdp11
#define DTOM(q) ((q)->ipf_mbuf) /* the mbuf for this fragment */
#else
#define DTOM(q) dtom(q)
#endif

/*
 * Take incoming datagram fragment and try to
 * reassemble it into whole datagram.  If a chain for
 * reassembly of this datagram already exists, then it
 * is given as fp; otherwise have to make a chain.
 */
struct ip *
ip_reass(ip, fp)
	register struct ipasfrag *ip;
	register struct ipq *fp;
{
	register struct mbuf *m = dtom(ip);
	register struct ipasfrag *q, *ipf;
	struct mbuf *t;
	int hlen = ip->ip_hl << 2;
	int i, next;

	/*
	 * Presence of header sizes in mbufs
	 * would confuse code below.
	 */
	m->m_off += hlen;
	m->m_len -= hlen;

	/*
	 * If first fragment to arrive, create a reassembly queue.
	 */
	if (fp == 0) {
		MSGET(fp, struct ipq, 0);
		if (fp == NULL)
			goto dropfrag;
		insque(fp, &ipq);
		fp->ipq_ttl = IPFRAGTTL;
		fp->ipq_p = ip->ip_p;
		fp->ipq_id = ip->ip_id;
		fp->ipq_next = fp->ipq_prev = (struct ipasfrag *)fp;
		fp->ipq_src = ((struct ip *)ip)->ip_src;
		fp->ipq_dst = ((struct ip *)ip)->ip_dst;
		q = (struct ipasfrag *)fp;
		goto insert;
	}

	/*
	 * Find a segment which begins after this one does.
	 */
	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next)
		if (q->ip_off > ip->ip_off)
			break;

	/*
	 * If there is a preceding segment, it may provide some of
	 * our data already.  If so, drop the data from the incoming
	 * segment.  If it provides all of our data, drop us.
	 */
	if (q->ipf_prev != (struct ipasfrag *)fp) {
		i = q->ipf_prev->ip_off + q->ipf_prev->ip_len - ip->ip_off;
		if (i > 0) {
			if (i >= ip->ip_len)
				goto dropfrag;
			m_adj(m, i);
			ip->ip_off += i;
			ip->ip_len -= i;
		}
	}

	/*
	 * While we overlap succeeding segments trim them or,
	 * if they are completely covered, dequeue them.
	 */
	while (q != (struct ipasfrag *)fp && ip->ip_off + ip->ip_len > q->ip_off) {
		i = (ip->ip_off + ip->ip_len) - q->ip_off;
		if (i < q->ip_len) {
			q->ip_len -= i;
			q->ip_off += i;
			m_adj(DTOM(q), i);
			break;
		}
		ipf = q->ipf_next;
		ip_deq(q);
		m_freem(DTOM(q));
#if pdp11
		MSFREE(q);
#endif
		q = ipf;
	}

insert:
	/*
	 * Stick new segment in its place;
	 * check for complete reassembly.
	 */
#if pdp11
	MSGET(ipf, struct ipasfrag, 0);
	if (ipf == 0) goto dropfrag;
	bcopy(ip, ipf, sizeof *ipf);
	ipf->ipf_mbuf = m;
	ip = ipf;
#endif
	ip_enq(ip, q->ipf_prev);
	next = 0;
	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = q->ipf_next) {
		if (q->ip_off != next)
			return (0);
		next += q->ip_len;
	}
	if (q->ipf_prev->ipf_mff)
		return (0);

	/*
	 * Reassembly is complete; concatenate fragments.
	 */
	q = fp->ipq_next;
	m = DTOM(q);
	hlen = q->ip_hl << 2;
	t = m->m_next;
	m->m_next = 0;
	m_cat(m, t);
	ipf = q;
	q = q->ipf_next;
#if pdp11
	MSFREE(ipf);
#endif
	while (q != (struct ipasfrag *)fp) {
		t = DTOM(q);
		ipf = q;
		q = q->ipf_next;
#if pdp11
		MSFREE(ipf);
#endif
		m_cat(m, t);
	}

	/*
	 * Create header for new ip packet by
	 * modifying header of first packet;
	 * dequeue and discard fragment reassembly header.
	 * Make header visible.
	 */
	m->m_len += hlen;
	m->m_off -= hlen;
	ip = mtod(m, struct ipasfrag *);
	ip->ip_len = next;
	((struct ip *)ip)->ip_src = fp->ipq_src;
	((struct ip *)ip)->ip_dst = fp->ipq_dst;
	remque(fp);
	MSFREE(fp);
	return ((struct ip *)ip);

dropfrag:
	m_freem(m);
	return (0);
}

/*
 * Free a fragment reassembly header and all
 * associated datagrams.
 */
struct ipq *
ip_freef(fp)
	struct ipq *fp;
{
	register struct ipasfrag *q, *p;
	struct ipq *fpp;

	for (q = fp->ipq_next; q != (struct ipasfrag *)fp; q = p) {
		p = q->ipf_next;
		ip_deq(q);		/* rip it out of fragment queue */
		m_freem(DTOM(q));	/* free up header portion(?) */
#if pdp11
		MSFREE(q);		/* free up data space */
#endif
	}
	fpp = fp;
	fp = fp->next;
	remque(fpp);			/* rip it out of ip reass. queue */
	MSFREE(fpp);
	return (fp);
}

/*
 * Put an ip fragment on a reassembly chain.
 * Like insque, but pointers in middle of structure.
 */
ip_enq(p, prev)
	register struct ipasfrag *p, *prev;
{

	p->ipf_prev = prev;
	p->ipf_next = prev->ipf_next;
	prev->ipf_next->ipf_prev = p;
	prev->ipf_next = p;
}

/*
 * To ip_enq as remque is to insque.
 */
ip_deq(p)
	register struct ipasfrag *p;
{

	p->ipf_prev->ipf_next = p->ipf_next;
	p->ipf_next->ipf_prev = p->ipf_prev;
}

/*
 * IP timer processing;
 * if a timer expires on a reassembly
 * queue, discard it.
 */
ip_slowtimo()
{
	register struct ipq *fp;
	int s = splnet();

	fp = ipq.next;
	if (fp == 0) {
		splx(s);
		return;
	}
	while (fp != &ipq)
		if (--fp->ipq_ttl == 0)
			fp = ip_freef(fp);
		else
			fp = fp->next;
	splx(s);
}

/*
 * Drain off all datagram fragments.
 */
ip_drain()
{

	while (ipq.next != &ipq)
		(void) ip_freef(ipq.next);
}

/*
 * Do option processing on a datagram,
 * possibly discarding it if bad options
 * are encountered.
 */
ip_dooptions(ip)
	struct ip *ip;
{
	register u_char *cp;
	int opt, optlen, cnt, code, type;
	struct in_addr *sin;
	register struct ip_timestamp *ipt;
	register struct ifnet *ifp;
	struct in_addr t;

	cp = (u_char *)(ip + 1);
	cnt = (ip->ip_hl << 2) - sizeof (struct ip);
	for (; cnt > 0; cnt -= optlen, cp += optlen) {
		opt = UCHAR(cp[0]);
		if (opt == IPOPT_EOL)
			break;
		if (opt == IPOPT_NOP)
			optlen = 1;
		else {
			optlen = UCHAR(cp[1]);
			if (optlen <= 0 || optlen > cnt)
				goto bad;
		}
		switch (opt) {

		default:
			break;

		/*
		 * Source routing with record.
		 * Find interface with current destination address.
		 * If none on this machine then drop if strictly routed,
		 * or do nothing if loosely routed.
		 * Record interface address and bring up next address
		 * component.  If strictly routed make sure next
		 * address on directly accessible net.
		 */
		case IPOPT_LSRR:  case IPOPT_SSRR:
			if (cp[2] < 4 || cp[2] > optlen - (sizeof (long) - 1))
				break;
			sin = (struct in_addr *)(cp + cp[2]);
			bcopy(sin, &ipaddr.sin_addr, sizeof *sin);
			ifp = if_ifwithaddr((struct sockaddr *)&ipaddr);
			type = ICMP_UNREACH, code = ICMP_UNREACH_SRCFAIL;
			if (ifp == 0) {
				if (opt == IPOPT_SSRR)
					goto bad;
				break;
			}
			t = ip->ip_dst; ip->ip_dst = ipaddr.sin_addr;
			bcopy(&t, sin, sizeof *sin);
			cp[2] += 4;
#ifdef notdef   /* this looks wrong */
			if (cp[2] > optlen - (sizeof (long) - 1))
				break;
			bcopy(&sin[1], &ip->ip_dst, sizeof *sin);
			if (opt == IPOPT_SSRR &&
			    if_ifonnetof(in_netof(ip->ip_dst)) == 0)
				goto bad;
#endif
			break;

		case IPOPT_TS:
			code = cp - (u_char *)ip;
			type = ICMP_PARAMPROB;
			ipt = (struct ip_timestamp *)cp;
			if (ipt->ipt_len < 5)
				goto bad;
			if (ipt->ipt_ptr > ipt->ipt_len - sizeof (long)) {
				if (++ipt->ipt_oflw == 0)
					goto bad;
				break;
			}
			sin = (struct in_addr *)(cp+cp[2]-1);
			switch (ipt->ipt_flg) {

			case IPOPT_TS_TSONLY:
				break;

			case IPOPT_TS_TSANDADDR:
				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
					goto bad;
				if (ifinet == 0)
					goto bad;	/* ??? */
				bcopy(&((struct sockaddr_in *)&ifinet->if_addr)->sin_addr,
					sin, sizeof *sin);
				sin++;
				break;

			case IPOPT_TS_PRESPEC:
				bcopy(sin, &ipaddr.sin_addr, sizeof *sin);
				if (!if_ifwithaddr((struct sockaddr *)&ipaddr))
					continue;
				if (ipt->ipt_ptr + 8 > ipt->ipt_len)
					goto bad;
				ipt->ipt_ptr += 4;
				break;

			default:
				goto bad;
			}
			{ n_time ntime, iptime();
			ntime = iptime();
			bcopy(&ntime, sin, sizeof *sin);
			}
			ipt->ipt_ptr += 4;
		}
	}
	return (0);
bad:
	icmp_error(ip, type, code);
	return (1);
}

/*
 * Strip out IP options, at higher
 * level protocol in the kernel.
 * Second argument is buffer to which options
 * will be moved, and return value is their length.
 */
ip_stripoptions(ip, mopt)
	struct ip *ip;
	struct mbuf *mopt;
{
	register int i;
	register struct mbuf *m;
	int olen;

	olen = (ip->ip_hl<<2) - sizeof (struct ip);
	m = dtom(ip);
	ip++;
	if (mopt) {
		mopt->m_len = olen;
		mopt->m_off = MMINOFF;
		if (olen) MBCOPY(m, sizeof *ip, mopt, 0, olen);
	}
	if (olen == 0) return;
	i = m->m_len - (sizeof (struct ip) + olen);
	bcopy((caddr_t)ip+olen, (caddr_t)ip, (unsigned)i);
	m->m_len -= olen;
}

u_char inetctlerrmap[] = {
	ECONNABORTED,	ECONNABORTED,	0,		0,
	0,		0,
	EHOSTDOWN,	EHOSTUNREACH,	ENETUNREACH,	EHOSTUNREACH,
	ECONNREFUSED,	ECONNREFUSED,	EMSGSIZE,	0,
	0,		0,		0,		0
};

ip_ctlinput(cmd, arg)
	int cmd;
	caddr_t arg;
{
	struct in_addr *sin;
	int tcp_abort(), udp_abort();
	extern struct inpcb tcb, udb;

	if (cmd < 0 || cmd > PRC_NCMDS)
		return;
	if (inetctlerrmap[cmd] == 0)
		return;		/* XXX */
	if (cmd == PRC_IFDOWN)
		sin = &((struct sockaddr_in *)arg)->sin_addr;
	else if (cmd == PRC_HOSTDEAD || cmd == PRC_HOSTUNREACH)
		sin = (struct in_addr *)arg;
	else
		sin = &((struct icmp *)arg)->icmp_ip.ip_dst;
	in_pcbnotify(&tcb, sin, inetctlerrmap[cmd], tcp_abort);
	in_pcbnotify(&udb, sin, inetctlerrmap[cmd], udp_abort);
}

int	ipprintfs = 0;			/*DEBUG*/
int	ipforwarding = 1;
/*
 * Forward a packet.  If some error occurs return the sender
 * and icmp packet.  Note we can't always generate a meaningful
 * icmp message because icmp doesn't have a large enough repetoire
 * of codes and types.
 */
ip_forward(ip)
	register struct ip *ip;
{
	register int error, type, code;
	struct mbuf *mopt, *mcopy, *m = dtom(ip);

	if (ipprintfs)
		printf("forward: src %X dst %X ttl %x\n", ip->ip_src,
			ip->ip_dst, ip->ip_ttl);
	if (ipforwarding == 0) {
		/* can't tell difference between net and host */
		type = ICMP_UNREACH, code = ICMP_UNREACH_NET;
		goto sendicmp;
	}
	if (UCHAR(ip->ip_ttl) < IPTTLDEC) {
		type = ICMP_TIMXCEED, code = ICMP_TIMXCEED_INTRANS;
		goto sendicmp;
	}
	ip->ip_ttl -= IPTTLDEC;
	mopt = m_get(M_DONTWAIT);
	if (mopt == 0) {
		m_freem(m);
		return;
	}

	/*
	 * Save at most 64 bytes of the packet in case
	 * we need to generate an ICMP message to the src.
	 */
	mcopy = m_copy(m, 0, MIN(ip->ip_len, 64));
	ip_stripoptions(ip, mopt);

	/* last 0 here means no directed broadcast */
	if ((error = ip_output(m, mopt, 0, 0)) == 0) {
		if (mcopy)
			m_freem(mcopy);
		return;
	}
	ip = mtod(mcopy, struct ip *);
	type = ICMP_UNREACH, code = 0;		/* need ``undefined'' */
	switch (error) {

	case ENETUNREACH:
	case ENETDOWN:
		code = ICMP_UNREACH_NET;
		break;

	case EMSGSIZE:
		code = ICMP_UNREACH_NEEDFRAG;
		break;

	case EPERM:
		code = ICMP_UNREACH_PORT;
		break;

	case ENOBUFS:
		type = ICMP_SOURCEQUENCH;
		break;

	case EHOSTDOWN:
	case EHOSTUNREACH:
		code = ICMP_UNREACH_HOST;
		break;
	}
sendicmp:
	icmp_error(ip, type, code);
}
