[Date Prev][Date Next][Thread Prev][Thread Next][Date Index][Thread Index]

[Fwd: Your version of "dummynet"... [from a colleague]]



mroe notes from denis on dummynet
-- 
Professor Lawrence A. Rowe          Internet:  Rowe@BMRC.Berkeley.EDU
Computer Science Division - EECS       Phone: 510-642-5117
University of California, Berkeley       Fax: 510-642-5615
Berkeley, CA 94720-1776            URL: http://bmrc.berkeley.edu/~larry

-- BEGIN included message


Larry,

Here's what my colleague Michael Stein, here at UCLA, did on Linux to
implement a "long delay path" for testing of his Virtual Worlds Server
(VWDS). I am sure with some tinkering it can be adapted for purposes of
testing the RealServer problems that you are observing -- only tinkering
needed might be to suitably program his "delay" routine to deliver
"variable" instead of "fixed" delay.

Since you have Linux widely deployed in the lab, this might prove easier
than doing the dummynet thing on FreeBSD.

-- Denis


---------- Forwarded message ----------
Date: Fri, 1 Dec 2000 11:02:39 -0800
From: Michael Stein <mas@ucla.edu>
To: Denis DeLaRoca <delaroca@acacia.cts.ucla.edu>
Subject: Re: Your version of "dummynet"...

> I recall that using the ethertap driver in Linux you implemented something
> like "dummynet" to vary delay and loss rates while testing your Virtual
> Worlds server. Do you have your mods in any shape that you could release,
> or some recipe therein...

- - -

Basic dataView network topology:

  k2 -- dataView runs here

  c01/c02 -- VWDS compute nodes - dataView tileselector and Geo calc run here

To simulate a long path between k2 and the c01/c02 machines the routing was
changed on c01 and c02 so that packets to k2 were sent to an additional "delay"
machine t02 instead of directly to k2. This machine delayed these packets
and after the delay sent them on to k2.  This isn't a perfect simulation of a 
long path as the delay is not symmetric as it would be on a real
long path however it does model the correct delay at the packet level
below TCP (including delaying TCP ACK packets).

The delay was implemented in t02 by using the Linux 2.2 kernel policy
based routing to route inbound packets to the "ethertap" kernel to userspace
interface while inbound packets from the "ethertap" device had
a different routing (the normal routing out the ethernet).

Once all the routing is set up, a small user level program reads and
writes packets to/from the ethertap device to complete the path.
This program allows specifying the delay to introduce in the path
in usec.  These tests were run with a delay specified of 75000 usec (75 ms).
Ping times were showing around 78 ms.

 - - -

You need a Linux kernel with the "advanced routing" turned on.

A url might be: http://snafu.freedom.org/linux2.2/iproute-notes.html

----------------------------
You need routing like:

#!/bin/bash

# make sure tap0 device is up
ifconfig tap0 192.168.5.5

ip rule add iif eth0 table 100
#
ip route add table 100 dev tap0 

# follow causes things to become active...
ip route flush cache

ip rule
ip route show table 100
ip route show 

echo "1" >/proc/sys/net/ipv4/ip_forward

----------------------------

My notes say:

ftp'ed from:

tcp        0      0 t02.vwds.oac.ucla.:1024 ftp.funet.fi:ftp        ESTABLISHED 

ftp> pwd 
257 "/pub/mirrors/ftp.inr.ac.ru/ip-routing" is the current directory.
ftp> 


 - - - - -

see also /usr/src/linux/Doc*/networking/ethertap.txt

maketap --> mas script to issue mknod for ethertap devices


built ip and tc commands; copied ip to /root/bin

printed ip command manual doc/ip-cref.ps (after make in doc directory)

put parm files in /etc/iproute2/*

------------------------------------------------------------------------------

The basic program looks like: (C++)

	/* pktdelay.cpp -- delay packets from/to ethertap */ 

#include <stdlib.h>
//#include <stddef.h>
#include <string.h>
//#include <assert.h>

#include <stdio.h>
#include <errno.h>

#include <unistd.h>

#include <sys/types.h>
#include <sys/stat.h>
#include <fcntl.h>

//#include <time.h>

//#include <signal.h>
#include <sys/types.h>
#include <sys/time.h>
//#include <memory.h>
//#include <sys/socket.h>
//#include <netinet/in.h>
//
#include <unistd.h>
//#include <sys/stat.h>
//#include <fcntl.h>
//
//#include <sys/socket.h>
//#include <netinet/in.h>
//#include <arpa/inet.h>
#include <sys/ioctl.h>

//#include <netdb.h>

const int USEC = 1000000;

struct delaybuf {
	struct delaybuf *dl_link;
	struct timeval	dl_when;
	int		dl_datalen;
#define DL_BUFL 2048-64
	unsigned char	dl_buf[DL_BUFL];
};

class xMain {
	int	xm_debug;	
	char	*xm_fname;
	int	xm_delayu;
	int	xm_cntpkt;
	int	xm_tapfd;		// /dev/tapX file descriptor
	struct timeval xm_delay;	// delay in timeval fmt 
	struct timeval xm_now;		// current time
	struct timeval xm_then;		// time next timer event
	fd_set	*xm_writefdsp;		// @ writefds or 0 
	fd_set	xm_readfds;		// select read bits
	fd_set	xm_writefds;		// select write bits
	struct delaybuf *xm_buftop;
	struct delaybuf *xm_bufbot;
	struct delaybuf *xm_buffree;

 public:
	int do_args(int argc, char **argv);

 private:
	void do_open(void);
	void selectloop(void);
	void do_time(delaybuf *bp);
	void do_read(void);
	delaybuf *bufget(void);
	void buffree(delaybuf *bp);
};

static
char usage1[] = "pktdelay [-d #]  [-f tap-file-name] [delay-usec]\n"
 "-d\t\t\tdebug flags\n"
 "-f <tap-file-name> - file name for ethertap device (default /dev/tap0)\n" 
 "delay-usec - amount to delay packets in usec\n";

void
usage(void) {
		fputs(usage1, stderr);
}

int
main(int argc, char **argv) {
xMain x;

	return (x.do_args(argc, argv));
}

int
xMain::do_args(int argc, char **argv) {
int i;

	xm_debug = 0;
	xm_fname = 0;
	xm_delayu = 0;
	xm_cntpkt = 0;

	for(i = 1; i < argc; i++) {

		if (xm_debug) printf("arg %d !%s!\n", i, argv[i]);

		if (argv[i][0] == '-') {
			if (argv[i][2] != 0) goto badflag; 
			if (argv[i][1] == 'd') {
				xm_debug = atoi(argv[++i]);
				continue;
			} 
			if (argv[i][1] == 'f') {
				xm_fname = argv[++i];
				continue;
			}
	badflag:
			fprintf(stderr, "invalid flag %s\n", argv[i]);
			usage();
			exit(1);
		}
		if (!xm_delayu) {
			xm_delayu = atoi(argv[i]);
			continue;
		} else {
			fprintf(stderr, "invalid arg %s\n", argv[i]);
			usage();
			exit(1);
		}
	}

	if (!xm_fname) {
		xm_fname = "/dev/tap0";
	}

	xm_delay.tv_sec  = xm_delayu / USEC;
	xm_delay.tv_usec = xm_delayu % USEC;

	i = 47;

	fprintf(stderr, "using delay %ld sec %ld usec, ",
			xm_delay.tv_sec, xm_delay.tv_usec);

	fprintf(stderr, "ethertap device: %s, debug %d\n",
			xm_fname, xm_debug);

	do_open();

	selectloop();

	return 0;
}

//////////////////////////////////////////////////////////////////////////
void xMain::do_open(void) {
int i, rc;

	FD_ZERO(&xm_readfds);
	FD_ZERO(&xm_writefds);
	xm_writefdsp = 0;
	xm_buftop = xm_bufbot = xm_buffree = 0;

	xm_tapfd = open(xm_fname, O_RDWR);
        if(xm_tapfd < 0) {
		fprintf(stderr, "error opening ethertap device %s %s\n",
			xm_fname, strerror(errno));
                exit(1);
        }

// set ethertap device to non-block (does this work?)
// (this allows noticing if writes would block...)

	i = 1;
	rc = ioctl(xm_tapfd, FIONBIO, &i);
	if (rc) {
		perror("ioctl FIONBIO failed");
		exit(1);
	}
}
//////////////////////////////////////////////////////////////////////////
void xMain::selectloop(void) {
int rc;
delaybuf *bp;


 loop:
	FD_SET(xm_tapfd, &xm_readfds);

	gettimeofday(&xm_now, NULL);
	xm_then.tv_sec = 10;		// in case empty buffer queue
	xm_then.tv_usec = 0;

	while(xm_buftop) {
		// check if buffer delay complete
		bp = xm_buftop;
		if ( ((xm_now.tv_usec > bp->dl_when.tv_usec)
			 && (xm_now.tv_sec == bp->dl_when.tv_sec))
		    || xm_now.tv_sec > bp->dl_when.tv_sec) { 
			// time complete
			xm_buftop = bp->dl_link;
			do_time(bp);
			continue;
		}	
		xm_then.tv_sec  = bp->dl_when.tv_sec  - xm_now.tv_sec;
		xm_then.tv_usec = bp->dl_when.tv_usec - xm_now.tv_usec;
		break;	
	}

	if (xm_then.tv_usec < 0) {
		xm_then.tv_sec--;
		xm_then.tv_usec += USEC;
	}

	if (xm_debug) {
		if (xm_buftop) {
			bp = xm_buftop;
			printf("top: %8.8lx %8.8lx\n",
				 bp->dl_when.tv_sec, bp->dl_when.tv_usec);
		}
		printf("now: %8.8lx %8.8lx\n",
			xm_now.tv_sec, xm_now.tv_usec);
		printf("then: %8.8lx %8.8lx\n",
			xm_then.tv_sec, xm_then.tv_usec);
	}
	rc = select(xm_tapfd+1, &xm_readfds, (fd_set *)0, 
		(fd_set *)0, &xm_then);
	if (rc == -1) {
		perror("Unable to Select Socket");
		exit(1);
	}

	if (FD_ISSET(xm_tapfd, &xm_readfds)) {
		do_read();

	}
 /* 	if (FD_ISSET(xm_tapfd, &writefds)) { */
		

 /* 	}					*/

	goto loop;
}

//////////////////////////////////////////////////////////////////////////
void xMain::do_time(delaybuf *bp) {
int rc;

	rc = write(xm_tapfd, bp->dl_buf, bp->dl_datalen);

	if (rc != bp->dl_datalen) {
		fprintf(stderr, "write error rc %d, len %d, %s\n",
			rc, bp->dl_datalen, strerror(errno));
	}

	buffree(bp);
}
//////////////////////////////////////////////////////////////////////////
void xMain::do_read(void) {
delaybuf *bp;
int rc;

	bp = bufget();

	rc = read(xm_tapfd, bp->dl_buf, DL_BUFL);

	if (rc <= 0) {
		fprintf(stderr, "read error rc %d, %s\n",
			rc, strerror(errno));
		buffree(bp);
		return;
	}
	bp->dl_datalen = rc;

	if (rc == DL_BUFL) {
		fprintf(stderr, "max len packet - might be truncated?\n");
	}

	xm_cntpkt++;

	gettimeofday(&xm_now, NULL);
	bp->dl_when.tv_sec  = xm_now.tv_sec  + xm_delay.tv_sec;
	bp->dl_when.tv_usec = xm_now.tv_usec + xm_delay.tv_usec;
	if (bp->dl_when.tv_usec >= USEC) {
		bp->dl_when.tv_usec -= USEC;
		bp->dl_when.tv_sec++;
	}

	bp->dl_link = 0;
	if (xm_buftop == 0) {
		xm_buftop = bp;
	} else {
		xm_bufbot->dl_link = bp;
	}
	xm_bufbot = bp;
}
//////////////////////////////////////////////////////////////////////////
// bufget -- get buffer, always succeeds (or exits) 
delaybuf *xMain::bufget(void) {
delaybuf *bp;

	bp = xm_buffree;
	if (bp != NULL) {
		xm_buffree = bp->dl_link;
		return bp;
	}

	bp = new delaybuf; 
	if (bp != 0) return bp;

	fprintf(stderr, "bufget: new failed, len %d\n", 
		sizeof(delaybuf));
	exit(42);
}
//////////////////////////////////////////////////////////////////////////
void xMain::buffree(delaybuf *bp) {

	bp->dl_link = xm_buffree;
	xm_buffree = bp;
}

-- END included message