/*
 * $Id: //devel/tools/main/fastzolver/fastzolver.cpp#2 $
 *
 * written by :	Stephen J. Friedl
 *		Software Consultant
 *		Tustin, California US
 *
 *	This is a DNS-resolving helper program that works hand in hand with
 *	Webalizer: it uses asynchronous DNS to run through logfiles and
 *	populate the DNS cache with IP-to-name lookups. This is MUCH faster
 *	than using the DNS child processes that webazolver uses: we have
 *	no trouble getting 100 resolutions/second when talking to a decent
 *	nameserver, with much less system load than the equivalent
 *	child-process method.
 *
 *	The program reads logfile records one at a time, and the ONLY thing
 *	we care about is the IP address - we are doing positively no parsing
 *	of anything else, as all we care about is populating the DNS cache.
 *	In fact, the read-log-line code specifically limits the buffer to
 *	just enough to capture the IP address, ignoring anything after that.
 *
 *	Later, webalizer is run with this DNS cache and told not to do
 *	and name resolution - *everything* is in the cache. This essentially
 *	disables DNS lookups by webalizer.
 *	
 * FORMAT OF THE CACHE
 * -------------------
 *
 *	The "key" of the cache is the IP address as a string in the
 *	traditional dotted quad formation. The contents of the data
 *	is a variable-length structure that includes the time the record
 *	was created (so cache entries can age properly), a type, and
 *	the string of the hostname.
 *
 *	The "type" can be either "hostname" or "IP", and the latter is used
 *	as a kind of negative caching when a name cannot be resolved: By
 *	storing the IP address as a string, we don't keep trying to look
 *	up the same address over and over (and having it fail).
 *
 *	We use the wonderful ADNS - asynchronous DNS - package to do
 *	these lookups, and it allows multiple overlapping requests:
 *	our code is simply keeping track of which ones are pending.
 *
 *		http://www.chiark.greenend.org.uk/~ian/adns/
 *
 *	Looked-up names are not known forever - each entry in the cache
 *	has a time it was saved, and we expire IP address entries (which
 *	were unknown) sooner than successful lookups.
 *
 *	===TODO: tuning these parameters
 *
 * COMMAND LINE PARAMETERS
 * -----------------------
 *
 *	-d
 *
 *		Turn on a bit of debug
 *
 *	-D file
 *
 *		Specify the name of the cache file (default: "dns_cache.db")
 *
 *	-R
 *		Remove the cache file before running: mainly used while
 *		testing to insure a clean run. This is not compiled in
 *		for production builds.
 *
 *	-s
 *		Report statistics every seconds
 *
 *	-L #
 *		Set the limit for outstanding queries to <#>. Once this
 *		limit is reached, it pauses for a bit to allow things
 *		to catch up and not swamp the DNS server. Default is 40,
 *		and -L0 disables any limit (run wide open).
 *
 *	-N x
 *
 *		This option is ignored and is included for compatibility
 *		with webazolver.
 *
 *	-H ndays
 *
 *		Expire known Hostnames after <ndays> days.
 *
 *	-U ndays
 *
 *		Expire Unknown IP addresses after <ndays> days.
 *
 *
 *	file [file...]
 *
 *		Open and read file(s) on the command line, else read from
 *		the standard input. Files that end in .gz are decompressed
 *		automatically.
 */
#include <sys/types.h>
#include <sys/time.h>
#include <arpa/inet.h>
#include <netinet/in.h>
#include <unistd.h>
#include <assert.h>
#include <string.h>
#include <stdlib.h>
#include <stddef.h>
#include <stdarg.h>
#include <stdio.h>
#include <ctype.h>
#include <errno.h>
#include <db_185.h>
#include <fcntl.h>
#include <time.h>
#ifdef SUPPORT_ZLIB
#  include <zlib.h>
#endif

#include <adns.h>

#ifndef TRUE
#  define TRUE   1
#  define FALSE  0
#endif

/* Enable this for debugging only! */
/* #define ENABLE_CACHE_REMOVE */

/*------------------------------------------------------------------------
 * VERSION INFORMATION
 *
 * We want to record the historical version information here to keep
 * track of what we released when.
 *
 * 1.1.0 - 2005-08-06
 *
 *	Initial public release 
 */
static const char Version[] = 
"fastzolver 1.1.0 - 2005-08-06 - http://www.unixwiz.net/tools/";

static int Verbose = 0;

/*------------------------------------------------------------------------
 * DB CACHE RECORD FORMAT
 *
 * When storing data in the DB file, the key is the ASCII IP address,
 * and the data is this small structure. The timestamp represents when
 * we've stored the data, and the type is whether we think it's an IP
 * address or a hostname.
 *
 * NOTE: this has to match what webalizer uses! Look in dns_resolv.h
 *
 * NOTE: when reading data from the database, it's not always aligned
 * properly, so we read it into this union so as to guarantee that
 * alignment.
 */
#define TYPE_HOSTNAME	0
#define	TYPE_IPADDR	1

struct dnsRecord {
	time_t	timestamp;
	int	type;
	char	hostname[1];
};


// force non-char alignment
union dataRecord {
	struct dnsRecord	dns;
	char			buffer[2048];
};

/*------------------------------------------------------------------------
 * LINKED LIST OF WORK
 *
 *	Each query that's outstanding has a handful of data associated
 *	with it, and it's all stored in a linked list. We do these a bit
 *	differently than most, so we'll touch on the details here.
 *
 *	First, this list is completely unordered: the output is always
 *	populating a database, and it simply makes no difference which
 *	is done first. So we don't have to give the first thought
 *	to order and can just make it fast and safe.
 *
 *	Second, the we use a "behind pointer" technique: rather than point
 *	to the node of interest, we point to the node *behind* it (the
 *	parent), which makes it really easy to delete the node in question:
 *	An implication of this is that the list head is not a pointer to
 *	a node, but a dummy node itself, and we take the address of it.
 *	The node *contents* are uninteresting, but the "next" pointer is
 *	not.
 *
 *	node listbase;
 *
 *		...
 *		listbase.next = 0;
 *
 *		node *qbehind;
 *		node *qthis;
 *
 *		for ( node *qbehind = &listbase; qthis = qbehind->next; )
 *		{
 *			// do stuff with qthis
 *
 *			if ( we want to delete )
 *			{
 *				// link past current node
 *				qbehind->next = qthis->next;
 *				qthis->next = 0;
 *				free(qthis);
 *			}
 *			else
 *			{
 *				qbehind = qbehind->next;
 *			}
 *		}
 *
 *	We've used this behind-pointer technique for years, and we love
 *	how it removes special cases for handling "the beginning" or
 *	"the end" of the list.
 */

#define CTXMAGIC 0xBADF00D

struct dnsContext {
	unsigned long		magic;
	adns_query		query;
	struct sockaddr_in	sock;

	struct dnsContext	*next;

	// look up printable IP address for this entry
	const char *ipaddr(void) const { return inet_ntoa(sock.sin_addr); }
};

static struct dnsContext list;

static int storedns(DB *db, const char *ipaddr, int type, const char *name);
static int address_known(DB *db, const char *ipaddr);
static int check_pending(adns_state adns);
static int nextline(char *obuf, size_t bufsize);
static void report_stats(const char *msg);
static void die(const char *format, ...);

static DB *dns_db = 0;

static const char *dns_cachefile = "dns_cache.db";

/*------------------------------------------------------------------------
 * Aging horizons
 *
 * The cache maintains dates where the information was added, but it's
 * not good forever: at some intervals the data is "too old" and must
 * be discarded, and we set the timeouts differently for real hostnames
 * and negative-cached IP addresses.
 */

static int
	agetime_hostname = 5*24*60*60,		// 5 days
	agetime_ipaddr   = 1*24*60*60;		// 1 day

static time_t
	horizon_hostname,
	horizon_ipaddr;

/*------------------------------------------------------------------------
 * STATISTICS & DEBUGGING
 *
 * We keep track of a handful of of statistics while running through our
 * logfile, mainly for reporting to the user, but one (queries_pending)
 * for throttling the queries to a reasonable rate.
 */
static int	query_unknowns  = 0,
		query_successes = 0,
		count_loglines  = 0,
		queries_pending = 0;

static int	max_qpending    = 40;		// -L <n>

static bool	do_report_stats = FALSE;	// -s

static char	**file_list = 0;

int main(int argc, char **argv)
{
	int	c;

#ifdef ENABLE_CACHE_REMOVE
	bool do_cache_remove = false;
#endif

	// HACK: people expect this
	if ( argv[1] != 0   &&   strcmp(argv[1], "--version") == 0 )
	{
		puts(Version);
		exit(EXIT_SUCCESS);
	}

	while ( (c = getopt(argc, argv, "U:H:N:VsRD:L:d")) != EOF )
	{
		switch (c)
		{
		  case 'U':		// -U <ndays>
			agetime_ipaddr = atoi(optarg) * 24 * 60 * 60;
			break;

		  case 'H':		// -H <ndays>
			agetime_hostname = atoi(optarg) * 24 * 60 * 60;
			break;

		  case 'V':
			puts(Version);
			exit(EXIT_SUCCESS);

		  case 'd':
			Verbose++;
			break;

		  case 'L':
			max_qpending = atoi(optarg);
			break;

		  case 's':
			do_report_stats = TRUE;
			break;

		  case 'R':
#ifdef ENABLE_CACHE_REMOVE
			do_cache_remove = true;
#else
			puts("Note: -R parameter not supported in this build");
#endif
			break;

		  case 'D':
			dns_cachefile = optarg;
			break;

		  case 'N':
			/* nothing: dummy placeholder */
			break;

		  default:
			// getopt() reports the error
			exit(EXIT_FAILURE);
		}
	}

	/*----------------------------------------------------------------
	 * SANITY CHECKS / FILENAMES
	 *
	 * At the end of option processing, we may have some filenames
	 * left over: they're stored in "file list" so we can read thek
	 * later 
	 */

	if ( dns_cachefile == 0 )
		die("ERROR: missing DNS cache file -D <file> param");


	if ( argv[optind] != 0 ) file_list = &argv[optind];

	horizon_hostname = time(0) - agetime_hostname;
	horizon_ipaddr   = time(0) - agetime_ipaddr;

	printf("horizon_hostname = %s", ctime(&horizon_hostname) );
	printf("horizon_ipaddr   = %s", ctime(&horizon_ipaddr) );

	/*----------------------------------------------------------------
	 * OPEN THE DATABASE
	 *
	 * The cache is a Berkeley DB file, and we have to open or create
	 * it as a *hash* database. For debugging, the user can request
	 * that the cache file is dumped before running...
	 */
#ifdef ENABLE_CACHE_REMOVE
	if ( do_cache_remove ) unlink(dns_cachefile);
#endif

	dns_db = dbopen(dns_cachefile, O_CREAT|O_RDWR, 0664, DB_HASH, NULL);

	if ( dns_db == 0 )
	{
		die("ERROR: cannot open DNS cache file %s", dns_cachefile);
	}

	/*----------------------------------------------------------------
	 * ASYNC DNS SETUP
	 *
	 * The ADNS library requires inits, and we can either just use the
	 * local nameserver, or hardcode one.
	 *
	 * ===TODO: we really should make this cmdline selectible.
	 */

        adns_state adns;

	adns_initflags flags = (adns_initflags)
			( adns_if_noenv
	                | adns_if_checkc_freq );

#if 1
	adns_init(&adns, flags, 0);
#else
	adns_init_strcfg(&adns, flags, 0,
		"nameserver 127.0.0.1\n"
		"search unixwiz.net\n"
		"debug");
#endif

	/*----------------------------------------------------------------
	 * LINKED LIST SETUP:
	 *
	 * All of our linked lists work one-ahead, so the base is not a
	 * pointer, but an entry itself, and we loop not on "q" but on
	 * q->next.
	 *
	 *	q = &list;
	 *	while ( q->next )
	 *	{
	 *		// do stuff with q->next->whatever
	 *
	 * This makes it really easy to delete the currently-interesting
	 * entry and move on.
	 */
	memset(&list, 0, sizeof list);
	list.next = 0;

	int nqueries = 0;
	int every    = 10;

	struct in_addr last_addr;
	last_addr.s_addr = 0;

	char logbuf[32];

	time_t start_time = time(0);

	while ( nextline(logbuf, sizeof logbuf) )
	{
		count_loglines++;

		/*--------------------------------------------------------
		 * LOG ENTRY CRACKING
		 *
		 * "logbuf" contains the first part of the line we're to
		 * analyze, and the IP address ought to be first. Convert
		 * it to a binary value with "aton": if it works, we had
		 * a valid one, otherwise it's a bogus line and we skip
		 * it.
		 *
		 * Once we have a valid IP, see if this line is the same
		 * as the previous line: if so, we've already processed
		 * the IP and should skip it entirely.
		 *
		 * Finally, convert to a *printable* IP address for
		 * debug reporting.
		 */
		struct in_addr addr;

		if ( inet_aton(logbuf, &addr) == 0 )
		{
			printf("bad log IP {%s}\n", logbuf);
			continue;
		}

		if ( addr.s_addr == last_addr.s_addr ) continue;

		last_addr = addr;

		char printable_ipaddr[20];
		strncpy(printable_ipaddr, inet_ntoa(addr), sizeof printable_ipaddr-1);
		printable_ipaddr[sizeof printable_ipaddr - 1] = '\0';

		/*--------------------------------------------------------
		 * If the name already exists in the cache (adjusted for
	 	 * expiry time), we're done with this log entry. Note that
		 * this doesn't mean we know the DNS name or that we have
		 * completed the lookup, just that we don't need to do one
		 * here (e.g., a lookup could already be in progress, or
		 * one could have already failed).
		 */

		if ( address_known(dns_db, printable_ipaddr) )
			continue;

		// save the IP->IP translation
		storedns(dns_db,
			printable_ipaddr,
			TYPE_IPADDR,
			printable_ipaddr);

		struct dnsContext *pctx =
			(struct dnsContext *)calloc(1, sizeof *pctx);

		pctx->magic           = CTXMAGIC;
		pctx->sock.sin_addr   = addr;
		pctx->sock.sin_family = AF_INET;
		pctx->query           = 0;
		pctx->next            = 0;

		if ( Verbose )
			printf("QUERY: %s\n", pctx->ipaddr() );

		int rc;

		if ( (rc = adns_submit_reverse(
			adns,
			(struct sockaddr *)&pctx->sock,
			adns_r_ptr,
			adns_qf_owner,
			pctx,
			&pctx->query)) != 0 )
		{
			printf("adns failure [%d]\n", rc);
			sleep(1);

			continue;
		}

		// add to the head of the linked list (order doesn't matter)
		pctx->next = list.next;
		list.next = pctx;

		adns_processany(adns);

		queries_pending++;

		if ( max_qpending > 0  && queries_pending >= max_qpending )
		{
			sleep(1);
			report_stats("paused (qlimit)");
			adns_processany(adns);
			check_pending(adns);
		}
		else
		{
			if ( (++nqueries % every) == 0 )
				check_pending(adns);
			report_stats("");
		}
	}
	printf("*end of scan, collecting final entries\n");

	while (  check_pending(adns) > 0 )
	{
		report_stats("finishing");
		sleep(1);
		/* NOTHING */
	}

	report_stats("Done!");

	time_t elapsed_time = time(0) - start_time;

	if (elapsed_time == 0) elapsed_time = 1;

	const float qcount = query_successes + query_unknowns;

	printf("Processed %.0f queries in %ld seconds (%.2f q/sec)\n",
		qcount,
		(long)elapsed_time,
		qcount / elapsed_time);

	adns_finish(adns);

	dns_db->close(dns_db);

	return 0;
}

/*
 * setup_key()
 *
 *	We set up the key the same way for fetch and store, and this
 *	lets us insure that the keys are done in common.
 */
static void setup_key(DBT *dbt, const char *ipaddr)
{
	assert(dbt    != 0);
	assert(ipaddr != 0);

	memset(dbt, 0, sizeof *dbt);

	dbt->data = (void *)ipaddr;
	dbt->size = strlen(ipaddr);
}


/*
 * storedns()
 *
 *	Given an IP address and a hostname, store it in the cache. Because
 *	we wish to cache "misses", the "hostname" could actually be in the
 *	form of an IP address (but in string format). We do pass in a type
 *	that lets us know which of the two it is.
 *
 *	We use this special IP address notation to note lookups in progress:
 *	as soon as we see an IP address for the first time, we immediately
 *	store it here with "TYPE_IPADDRESS": future lookups, even while the
 *	lookup is still in progress, will be skipped.
 */
static int storedns(DB *db, const char *ipname, int type, const char *name)
{
	assert(db     != 0);
	assert(ipname != 0);
	assert(name   != 0);

	union dataRecord ubuf;
	ubuf.dns.timestamp = time(0);
	ubuf.dns.type      = type;

	strncpy(ubuf.dns.hostname, name, 512);
	ubuf.dns.hostname[512] = '\0';

	DBT key;
	setup_key(&key, ipname);

	DBT value;
	memset(&value, 0, sizeof value);
	value.data = &ubuf;
	value.size = offsetof(struct dnsRecord, hostname)
	           + strlen(ubuf.dns.hostname)
	           + 1;

	if ( type == TYPE_HOSTNAME  &&   Verbose )
	{
		printf("  STORE: [%s] -> [%s] (%ld)\n",
			ipname, ubuf.dns.hostname,
			ubuf.dns.timestamp);
	}

	const int rc = db->put(db, &key, &value, 0);

	if ( rc != 0 )
	{
		printf("ERROR: db->put(%s) failed, rc=%d\n",
			ipname,
			rc);
	}

	return rc;
}

/*
 * address_known()
 *
 *	Given an IP address in string form, look it up in the database
 *	and return TRUE if the name is "known and valid" and FALSE if not.
 *	If the name is not found, then of course it's not known, but
 *	if it is found but the timestamp is too old, we nevertheless
 *	return "not found"
 *
 *	Note: for not-found entries, we really should delete the record
 *	from the DB (it's expired), but we're sure that what follows
 *	is to add it right back, so we leave it alone to turn it into
 *	an overwrite.
 */
static int address_known(DB *db, const char *ipaddr)
{
	assert(db     != 0);
	assert(ipaddr != 0);

	DBT key;
	setup_key(&key, ipaddr);

	DBT value;
	memset(&value, 0, sizeof value);

	// if not found, then we clearly don't know about it
	if ( db->get(db, &key, &value, 0) != 0 )
	{
		return FALSE;
	}

	struct dnsRecord rec;

	if (value.size > sizeof rec) value.size = sizeof rec;

	memcpy(&rec, value.data, value.size);

	switch ( rec.type )
	{
	   case TYPE_HOSTNAME:
		return rec.timestamp > horizon_hostname;

	   case TYPE_IPADDR:
		return rec.timestamp > horizon_ipaddr;

	   default:
		printf("*unknown type %d\n", rec.type );
		return FALSE;
	}
}

/*
 * check_pending()
 *
 *	This is called "periodically" to consume any replies from the
 *	DNS server and to store the results into the cache.
 *
 *	We're really fuzzy on how this is supposed to work, but so far
 *	it seems to be: we'll document more later.
 */
static int check_pending(adns_state adns)
{
	if ( Verbose )
		printf("ENTER check_pending\n");

	int queries_still_pending = 0;

	struct dnsContext *thisquery;

	for ( struct dnsContext *qbehind = &list; (thisquery = qbehind->next) != 0; )
	{
		assert(thisquery        != 0);
		assert(thisquery->magic == CTXMAGIC);

		adns_answer *ans = 0;
		adns_query   q   = thisquery->query;

		const int rc = adns_check(adns, &q, &ans, 0);

		switch (rc)
		{
		  case 0:
			assert(ans != 0);

			if ( ans->status == adns_s_ok  &&  ans->nrrs > 0 )
			{
				query_successes ++;

				storedns(dns_db,
					thisquery->ipaddr(),
					TYPE_HOSTNAME,
					*ans->rrs.str);
			}
			else
			{
				query_unknowns++;

				if ( Verbose )
				{
					printf("  ANSWER: %s -> unknown\n",
						thisquery->ipaddr());
				}
			}

			free(ans); ans = 0;
			break;

		  default:
		  case ESRCH:
			printf("  ERR: %s -> %d\n", thisquery->ipaddr(), rc);
			break;

		  case EAGAIN:
			queries_still_pending++;
			break;
		}

		/*--------------------------------------------------------
		 * LIST MANAGEMENT
		 *
		 * We're now finished considering the current entry, so we
		 * have to move on: Unless we got EAGAIN, we can throw the
		 * entry away, which means unlinking it from the list.
		 */
		if ( rc == EAGAIN )
		{
			// advance to next one in list
			qbehind = qbehind->next;
		}
		else
		{
			// de-link and destory current entry, DO NOT advance
			qbehind->next = thisquery->next;
			thisquery->next = 0;

			// destroy data so it's not reused
			memset(thisquery, 0, sizeof *thisquery);
			free(thisquery);

			queries_pending--;
		}
	}

	if ( Verbose )
		printf("Still pending: %d\n", queries_still_pending);

	return queries_still_pending;
}


/*
 * getline()
 *
 *	This reads a new log line from the input stream, retaining only
 *	the given number of bytes in the output buffer. In all cases
 *	we scan the entire line (up to the newline), though we actually
 *	retain only the requested buffer size.
 *
 *	This allows the caller to provide a relatively small buffer -
 *	just enough to be sure to get the IP address - and not waste
 *	time with the rest.
 *
 *	Return is TRUE if we have something in the buffer, or FALSE at
 *	the end of file. Note that this won't ever return an empty
 *	line, and the cr/lf is always stripped.
 */
static int getline(char *obuf, size_t bufsize, FILE *ifp)
{
char	*p        = obuf;
char	*obuf_max = obuf + bufsize - 2;
int	c;

	assert(obuf != 0);
	assert(ifp  != 0);

	while ( (c = fgetc(ifp)) != EOF )
	{
		/*--------------------------------------------------------
		 * If this is carriage control, ignore it if we're at the
		 * start of a line. Otherwise we are at the *end* of the
		 * line and can break out of this whole business.
		 */
		if ( c == '\n' || c == '\r' )
		{
			if (p == obuf)
				continue;
			else break;
		}
		else if (p < obuf_max)
		{
			*p++ = c;
		}
	}
	*p = '\0';

	return p > obuf;
}

#ifdef SUPPORT_ZLIB
/*
 * gzgetline()
 *
 *	Like "getline", this reads the GZIP data and inflates it on
 *	the fly. We keep a raw internal buffer for data from the GZ
 *	stream, and assemble just the first <bufsize> bytes of each
 *	line into the output, ignoring everything to the following
 *	newline.
 *
 *	NOTE: if we get EOF, we ignore any partially-constructed
 *	line. This is intentional.
 */
static int gzgetline(char *obuf, size_t bufsize, gzFile gfp)
{
char		*p        = obuf;
char		*obuf_max = obuf + bufsize - 2;
static char	iobuf[256],
		*nextget = iobuf,
		*lastget = iobuf;

	assert(obuf != 0);

	while ( TRUE )
	{
		/*--------------------------------------------------------
		 * If we have nothing in our buffer, we have to refill it,
		 * and EOF/error marks the end of the file.
		 */
		if ( nextget == lastget )
		{
		const int n = gzread(gfp, iobuf, sizeof iobuf);

			if (n <= 0) return FALSE;

			nextget = iobuf;
			lastget = iobuf + n;
		}

		assert(nextget < lastget);

		const int c = *nextget++;

		if ( c == '\n' || c == '\r' )
		{
			if (p == obuf)
				continue;
			else
				break;
		}
		else if ( p < obuf_max )
		{
			*p++ = c;
		}
	}
	*p = '\0';

	return p > obuf;
}
#endif /* SUPPORT_ZLIB */

/*
 * file_ends_with()
 *
 *	Given a filename and a suffix, return TRUE if the file ends with
 *	that suffix and FALSE if not. 
 */
#ifdef SUPPORT_ZLIB
static int file_ends_with(const char *fname, const char *suffix)
{
	assert(fname != 0);
	assert(suffix != 0);

	const int namelen = strlen(fname);
	const int sufflen = strlen(suffix);

	if (namelen <= sufflen) return FALSE;

	return strcmp(fname + namelen - sufflen, suffix) == 0;
}

#endif /* SUPPORT_ZLIB */


/*
 * nextline()
 *
 *	This fetches the next line of input from the input stream, opening
 *	each file in turn. "file_list" contains a NULL-terminated list of
 *	files
 */
static int nextline(char *obuf, size_t bufsize)
{
static FILE	*ifp = 0;		// FILE file
#ifdef SUPPORT_ZLIB
static gzFile	gfp = 0;		// gZip file
#endif
static bool	at_eof = FALSE;

#ifdef SUPPORT_ZLIB
#  define STREAM_IS_OPEN()	(ifp != 0  ||  gfp != 0)
#else
#  define STREAM_IS_OPEN()	(ifp != 0)
#endif

	/*----------------------------------------------------------------
	 * 
	 */
	if ( at_eof ) return FALSE;

	if ( ! STREAM_IS_OPEN() &&  file_list == 0 )
	{
		ifp = stdin;
	}

	while ( TRUE )
	{
		/*--------------------------------------------------------
		 * OPEN A FILE?
		 *
		 * If we have no FILE pointer, we have to find the next file
		 * to open, if any. The "file_list" is the array of files
		 * from the command line, and it could be NULL if we're
		 * supposed to read from the standard input.
		 */
		if ( ! STREAM_IS_OPEN() &&  file_list  &&  *file_list )
		{
			const char *const infile = *file_list++;

			if (strcmp(infile, "-") == 0 )
			{
				fprintf(stderr, "Reading from stdin\n");
				ifp = stdin;
			}
#ifdef SUPPORT_ZLIB
			else if ( file_ends_with(infile, ".gz") )
			{
				if ( (gfp = gzopen(infile, "rb")) == 0 )
				{
					die("ERROR: cannot gzopen %s [%s]",
						infile, strerror(errno));
				}
			}
#endif
			else if ( (ifp = fopen(infile, "rt")) == 0 )
			{
				die("ERROR: cannot fopen %s [%s]",
					infile, strerror(errno));
			}
			else
			{
				fprintf(stderr, "Reading from %s\n", infile);
			}
		}

		// no more files? we're done
		if ( ! STREAM_IS_OPEN() )
		{
			at_eof = TRUE;
			return FALSE;
		}

		// if we got some data, return it immediately

		if ( ifp  &&  getline(obuf, bufsize, ifp) )
		{
			return TRUE;
		}

#ifdef SUPPORT_ZLIB
		if ( gfp   && gzgetline(obuf, bufsize, gfp) )
		{
			return TRUE;
		}
#endif

		// got EOF on one file, close & advance to the next one
		if (ifp) { fclose(ifp);  ifp = 0; }
#ifdef SUPPORT_ZLIB
		if (gfp) { gzclose(gfp); gfp = 0; }
#endif
	}
}

/*
 * report_stats()
 *
 *	If the user has enabled statistics reporting, report them no more
 *	often than once per second.
 */
static void report_stats(const char *msg)
{
static time_t	last_report_time = 0;

	time_t now;

	if ( do_report_stats  &&   (now = time(0)) > last_report_time )
	{
		printf("STATS: %d lines, %d pending, %d results (%d success): %s\n",
			count_loglines,
			queries_pending,
			query_successes + query_unknowns,
			query_successes,
			msg );

		last_report_time = now;
	}
}


/*
 * die()
 *
 *	Given a printf-like string, format it to the standard error,
 *	append a newline, and exit with error status.
 */
static void die(const char *format, ...)
{
va_list args;

	va_start(args, format);
	vfprintf(stderr, format, args);
	va_end(args);
	putc('\n', stderr);

	exit(EXIT_FAILURE);
}
