mroe notes from denis on dummynet -- Professor Lawrence A. Rowe Internet: Rowe@BMRC.Berkeley.EDU Computer Science Division - EECS Phone: 510-642-5117 University of California, Berkeley Fax: 510-642-5615 Berkeley, CA 94720-1776 URL: http://bmrc.berkeley.edu/~larry
-- BEGIN included message
- To: Larry Rowe <Rowe@bmrc.berkeley.edu>
- Subject: Re: Your version of "dummynet"... [from a colleague]
- From: Denis DeLaRoca <delaroca@acacia.cts.ucla.edu>
- Date: Fri, 1 Dec 2000 13:08:25 -0800 (PST)
Larry, Here's what my colleague Michael Stein, here at UCLA, did on Linux to implement a "long delay path" for testing of his Virtual Worlds Server (VWDS). I am sure with some tinkering it can be adapted for purposes of testing the RealServer problems that you are observing -- only tinkering needed might be to suitably program his "delay" routine to deliver "variable" instead of "fixed" delay. Since you have Linux widely deployed in the lab, this might prove easier than doing the dummynet thing on FreeBSD. -- Denis ---------- Forwarded message ---------- Date: Fri, 1 Dec 2000 11:02:39 -0800 From: Michael Stein <mas@ucla.edu> To: Denis DeLaRoca <delaroca@acacia.cts.ucla.edu> Subject: Re: Your version of "dummynet"... > I recall that using the ethertap driver in Linux you implemented something > like "dummynet" to vary delay and loss rates while testing your Virtual > Worlds server. Do you have your mods in any shape that you could release, > or some recipe therein... - - - Basic dataView network topology: k2 -- dataView runs here c01/c02 -- VWDS compute nodes - dataView tileselector and Geo calc run here To simulate a long path between k2 and the c01/c02 machines the routing was changed on c01 and c02 so that packets to k2 were sent to an additional "delay" machine t02 instead of directly to k2. This machine delayed these packets and after the delay sent them on to k2. This isn't a perfect simulation of a long path as the delay is not symmetric as it would be on a real long path however it does model the correct delay at the packet level below TCP (including delaying TCP ACK packets). The delay was implemented in t02 by using the Linux 2.2 kernel policy based routing to route inbound packets to the "ethertap" kernel to userspace interface while inbound packets from the "ethertap" device had a different routing (the normal routing out the ethernet). Once all the routing is set up, a small user level program reads and writes packets to/from the ethertap device to complete the path. This program allows specifying the delay to introduce in the path in usec. These tests were run with a delay specified of 75000 usec (75 ms). Ping times were showing around 78 ms. - - - You need a Linux kernel with the "advanced routing" turned on. A url might be: http://snafu.freedom.org/linux2.2/iproute-notes.html ---------------------------- You need routing like: #!/bin/bash # make sure tap0 device is up ifconfig tap0 192.168.5.5 ip rule add iif eth0 table 100 # ip route add table 100 dev tap0 # follow causes things to become active... ip route flush cache ip rule ip route show table 100 ip route show echo "1" >/proc/sys/net/ipv4/ip_forward ---------------------------- My notes say: ftp'ed from: tcp 0 0 t02.vwds.oac.ucla.:1024 ftp.funet.fi:ftp ESTABLISHED ftp> pwd 257 "/pub/mirrors/ftp.inr.ac.ru/ip-routing" is the current directory. ftp> - - - - - see also /usr/src/linux/Doc*/networking/ethertap.txt maketap --> mas script to issue mknod for ethertap devices built ip and tc commands; copied ip to /root/bin printed ip command manual doc/ip-cref.ps (after make in doc directory) put parm files in /etc/iproute2/* ------------------------------------------------------------------------------ The basic program looks like: (C++) /* pktdelay.cpp -- delay packets from/to ethertap */ #include <stdlib.h> //#include <stddef.h> #include <string.h> //#include <assert.h> #include <stdio.h> #include <errno.h> #include <unistd.h> #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> //#include <time.h> //#include <signal.h> #include <sys/types.h> #include <sys/time.h> //#include <memory.h> //#include <sys/socket.h> //#include <netinet/in.h> // #include <unistd.h> //#include <sys/stat.h> //#include <fcntl.h> // //#include <sys/socket.h> //#include <netinet/in.h> //#include <arpa/inet.h> #include <sys/ioctl.h> //#include <netdb.h> const int USEC = 1000000; struct delaybuf { struct delaybuf *dl_link; struct timeval dl_when; int dl_datalen; #define DL_BUFL 2048-64 unsigned char dl_buf[DL_BUFL]; }; class xMain { int xm_debug; char *xm_fname; int xm_delayu; int xm_cntpkt; int xm_tapfd; // /dev/tapX file descriptor struct timeval xm_delay; // delay in timeval fmt struct timeval xm_now; // current time struct timeval xm_then; // time next timer event fd_set *xm_writefdsp; // @ writefds or 0 fd_set xm_readfds; // select read bits fd_set xm_writefds; // select write bits struct delaybuf *xm_buftop; struct delaybuf *xm_bufbot; struct delaybuf *xm_buffree; public: int do_args(int argc, char **argv); private: void do_open(void); void selectloop(void); void do_time(delaybuf *bp); void do_read(void); delaybuf *bufget(void); void buffree(delaybuf *bp); }; static char usage1[] = "pktdelay [-d #] [-f tap-file-name] [delay-usec]\n" "-d\t\t\tdebug flags\n" "-f <tap-file-name> - file name for ethertap device (default /dev/tap0)\n" "delay-usec - amount to delay packets in usec\n"; void usage(void) { fputs(usage1, stderr); } int main(int argc, char **argv) { xMain x; return (x.do_args(argc, argv)); } int xMain::do_args(int argc, char **argv) { int i; xm_debug = 0; xm_fname = 0; xm_delayu = 0; xm_cntpkt = 0; for(i = 1; i < argc; i++) { if (xm_debug) printf("arg %d !%s!\n", i, argv[i]); if (argv[i][0] == '-') { if (argv[i][2] != 0) goto badflag; if (argv[i][1] == 'd') { xm_debug = atoi(argv[++i]); continue; } if (argv[i][1] == 'f') { xm_fname = argv[++i]; continue; } badflag: fprintf(stderr, "invalid flag %s\n", argv[i]); usage(); exit(1); } if (!xm_delayu) { xm_delayu = atoi(argv[i]); continue; } else { fprintf(stderr, "invalid arg %s\n", argv[i]); usage(); exit(1); } } if (!xm_fname) { xm_fname = "/dev/tap0"; } xm_delay.tv_sec = xm_delayu / USEC; xm_delay.tv_usec = xm_delayu % USEC; i = 47; fprintf(stderr, "using delay %ld sec %ld usec, ", xm_delay.tv_sec, xm_delay.tv_usec); fprintf(stderr, "ethertap device: %s, debug %d\n", xm_fname, xm_debug); do_open(); selectloop(); return 0; } ////////////////////////////////////////////////////////////////////////// void xMain::do_open(void) { int i, rc; FD_ZERO(&xm_readfds); FD_ZERO(&xm_writefds); xm_writefdsp = 0; xm_buftop = xm_bufbot = xm_buffree = 0; xm_tapfd = open(xm_fname, O_RDWR); if(xm_tapfd < 0) { fprintf(stderr, "error opening ethertap device %s %s\n", xm_fname, strerror(errno)); exit(1); } // set ethertap device to non-block (does this work?) // (this allows noticing if writes would block...) i = 1; rc = ioctl(xm_tapfd, FIONBIO, &i); if (rc) { perror("ioctl FIONBIO failed"); exit(1); } } ////////////////////////////////////////////////////////////////////////// void xMain::selectloop(void) { int rc; delaybuf *bp; loop: FD_SET(xm_tapfd, &xm_readfds); gettimeofday(&xm_now, NULL); xm_then.tv_sec = 10; // in case empty buffer queue xm_then.tv_usec = 0; while(xm_buftop) { // check if buffer delay complete bp = xm_buftop; if ( ((xm_now.tv_usec > bp->dl_when.tv_usec) && (xm_now.tv_sec == bp->dl_when.tv_sec)) || xm_now.tv_sec > bp->dl_when.tv_sec) { // time complete xm_buftop = bp->dl_link; do_time(bp); continue; } xm_then.tv_sec = bp->dl_when.tv_sec - xm_now.tv_sec; xm_then.tv_usec = bp->dl_when.tv_usec - xm_now.tv_usec; break; } if (xm_then.tv_usec < 0) { xm_then.tv_sec--; xm_then.tv_usec += USEC; } if (xm_debug) { if (xm_buftop) { bp = xm_buftop; printf("top: %8.8lx %8.8lx\n", bp->dl_when.tv_sec, bp->dl_when.tv_usec); } printf("now: %8.8lx %8.8lx\n", xm_now.tv_sec, xm_now.tv_usec); printf("then: %8.8lx %8.8lx\n", xm_then.tv_sec, xm_then.tv_usec); } rc = select(xm_tapfd+1, &xm_readfds, (fd_set *)0, (fd_set *)0, &xm_then); if (rc == -1) { perror("Unable to Select Socket"); exit(1); } if (FD_ISSET(xm_tapfd, &xm_readfds)) { do_read(); } /* if (FD_ISSET(xm_tapfd, &writefds)) { */ /* } */ goto loop; } ////////////////////////////////////////////////////////////////////////// void xMain::do_time(delaybuf *bp) { int rc; rc = write(xm_tapfd, bp->dl_buf, bp->dl_datalen); if (rc != bp->dl_datalen) { fprintf(stderr, "write error rc %d, len %d, %s\n", rc, bp->dl_datalen, strerror(errno)); } buffree(bp); } ////////////////////////////////////////////////////////////////////////// void xMain::do_read(void) { delaybuf *bp; int rc; bp = bufget(); rc = read(xm_tapfd, bp->dl_buf, DL_BUFL); if (rc <= 0) { fprintf(stderr, "read error rc %d, %s\n", rc, strerror(errno)); buffree(bp); return; } bp->dl_datalen = rc; if (rc == DL_BUFL) { fprintf(stderr, "max len packet - might be truncated?\n"); } xm_cntpkt++; gettimeofday(&xm_now, NULL); bp->dl_when.tv_sec = xm_now.tv_sec + xm_delay.tv_sec; bp->dl_when.tv_usec = xm_now.tv_usec + xm_delay.tv_usec; if (bp->dl_when.tv_usec >= USEC) { bp->dl_when.tv_usec -= USEC; bp->dl_when.tv_sec++; } bp->dl_link = 0; if (xm_buftop == 0) { xm_buftop = bp; } else { xm_bufbot->dl_link = bp; } xm_bufbot = bp; } ////////////////////////////////////////////////////////////////////////// // bufget -- get buffer, always succeeds (or exits) delaybuf *xMain::bufget(void) { delaybuf *bp; bp = xm_buffree; if (bp != NULL) { xm_buffree = bp->dl_link; return bp; } bp = new delaybuf; if (bp != 0) return bp; fprintf(stderr, "bufget: new failed, len %d\n", sizeof(delaybuf)); exit(42); } ////////////////////////////////////////////////////////////////////////// void xMain::buffree(delaybuf *bp) { bp->dl_link = xm_buffree; xm_buffree = bp; }
-- END included message