#include "rx_locl.h" RCSID("$arla: rx_pkt.c,v 1.22 2003/04/08 22:15:08 lha Exp $"); struct rx_packet *rx_mallocedP = 0; struct rx_cbuf *rx_mallocedC = 0; /* string to send to rxdebug */ #define CML_VERSION_NUMBER_SIZE 65 static char cml_version_number[CML_VERSION_NUMBER_SIZE]= PACKAGE "-" VERSION ; extern int (*rx_almostSent) (); /* * some rules about packets: * 1. When a packet is allocated, the final iov_buf contains room for * a security trailer, but iov_len masks that fact. If the security * package wants to add the trailer, it may do so, and then extend * iov_len appropriately. For this reason, packet's niovecs and * iov_len fields should be accurate before calling PreparePacket. */ /* * Preconditions: * all packet buffers (iov_base) are integral multiples of * the word size. * offset is an integral multiple of the word size. */ uint32_t rx_SlowGetLong(struct rx_packet *packet, int offset) { int i, l; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { return *((uint32_t *) ((char *)packet->wirevec[i].iov_base + (offset - l))); } l += packet->wirevec[i].iov_len; } return 0; } /* Preconditions: * all packet buffers (iov_base) are integral multiples of the word * size. * offset is an integral multiple of the word size. */ int rx_SlowPutLong(struct rx_packet *packet, int offset, uint32_t data) { int i, l; for (l = 0, i = 1; i < packet->niovecs; i++) { if (l + packet->wirevec[i].iov_len > offset) { *((uint32_t *) ((char *)packet->wirevec[i].iov_base + (offset - l))) = data; return 0; } l += packet->wirevec[i].iov_len; } return 1; } /* * Preconditions: * all packet buffers (iov_base) are integral multiples of the * word size. * offset is an integral multiple of the word size. * Packet Invariants: * all buffers are contiguously arrayed in the iovec from 0..niovecs-1 */ size_t rx_SlowReadPacket(struct rx_packet *packet, int offset, int resid, void *out) { int i; unsigned char *p = out; size_t bytes; for(i = 1; (i < packet->niovecs) && (offset + (ssize_t)resid > 0); i++) { if(offset < packet->wirevec[i].iov_len) { /* at this point the intersection of this iovec and [offset, offset+resid) is non-empty, so we can copy min(base + len, base + offset + resid) - max(base, base + offset) bytes */ bytes = min(packet->wirevec[i].iov_len, offset + resid) - max(offset, 0); memcpy(p, (char *)packet->wirevec[i].iov_base + max(offset, 0), bytes); p += bytes; } offset -= packet->wirevec[i].iov_len; } return p - (unsigned char *)out; } /* * Preconditions: * all packet buffers (iov_base) are integral multiples of the * word size. * offset is an integral multiple of the word size. */ size_t rx_SlowWritePacket(struct rx_packet *packet, int offset, int resid, void *in) { int i; unsigned char *p = in; size_t bytes; for(i = 1; i < RX_MAXWVECS && offset + resid > 0; i++) { if(i >= packet->niovecs) if(rxi_AllocDataBuf(packet, resid)) break; if(offset < packet->wirevec[i].iov_len) { /* at this point the intersection of this iovec and [offset, offset+resid) is non-empty, so we can copy min(base + len, base + offset + resid) - max(base, base + offset) bytes */ bytes = min(packet->wirevec[i].iov_len, offset + resid) - max(offset, 0); memcpy((char *)(packet->wirevec[i].iov_base) + max(offset, 0), p, bytes); p += bytes; } offset -= packet->wirevec[i].iov_len; } return p - (unsigned char *)in; } static void freeCBuf(struct rx_cbuf *c) { SPLVAR; dpf(("Free cbuf %x\n", c)); NETPRI; MObtainWriteLock(&rx_freePktQ_lock); queue_Append(&rx_freeCbufQueue, c); rx_nFreeCbufs++; MReleaseWriteLock(&rx_freePktQ_lock); USERPRI; return; } static struct rx_cbuf * allocCBuf(void) { struct rx_cbuf *c; SPLVAR; NETPRI; MObtainWriteLock(&rx_freePktQ_lock); if (queue_IsEmpty(&rx_freeCbufQueue)) { #ifdef KERNEL c = NULL; rxi_NeedMoreCbufs = TRUE; goto done; #else /* KERNEL */ rxi_MoreCbufs(rx_Window); #endif /* KERNEL */ } rx_nFreeCbufs--; c = queue_First(&rx_freeCbufQueue, rx_cbuf); dpf(("Alloc cb %x\n", c)); queue_Remove(c); #ifdef KERNEL done: #endif MReleaseWriteLock(&rx_freePktQ_lock); USERPRI; return c; } /* Allocate more CBufs iff we need them */ /* * In kernel, can't page in memory with interrupts disabled, so we * don't use the event mechanism. */ void rx_CheckCbufs(unsigned long when) /* time when I should be called next */ { struct clock now; clock_GetTime(&now); if (rxi_NeedMoreCbufs) { rxi_MoreCbufs(rx_Window); } #ifndef KERNEL now.sec += RX_CBUF_TIME; rxevent_Post(&now, rx_CheckCbufs, (void *)now.sec, NULL); #endif } /* * this one is kind of awful. * In rxkad, the packet has been all shortened, and everything, ready for * sending. All of a sudden, we discover we need some of that space back. * This isn't terribly general, because it knows that the packets are only * rounded up to the EBS (userdata + security header). */ int rxi_RoundUpPacket(struct rx_packet *p, unsigned int nb) { int i; i = p->niovecs - 1; if (p->wirevec[i].iov_base == (caddr_t) p->localdata) { if (p->wirevec[i].iov_len <= RX_FIRSTBUFFERSIZE - nb) { p->wirevec[i].iov_len += nb; return 0; } } else { if (p->wirevec[i].iov_len <= RX_CBUFFERSIZE - nb) { p->wirevec[i].iov_len += nb; return 0; } } return 0; } /* get sufficient space to store nb bytes of data (or more), and hook * it into the supplied packet. Return nbytes<=0 if successful, otherwise * returns the number of bytes >0 which it failed to come up with. * Don't need to worry about locking on packet, since only * one thread can manipulate one at a time. Locking on cbufs is handled * by allocCBuf */ /* MTUXXX don't need to go throught the for loop if we can trust niovecs */ int rxi_AllocDataBuf(struct rx_packet *p, int nb) { int i; for (i = 0; nb > 0 && i < RX_MAXWVECS; i++) { if (p->wirevec[i].iov_base) continue; switch (i) { case 1: p->wirevec[i].iov_len = RX_FIRSTBUFFERSIZE; p->wirevec[i].iov_base = (caddr_t) p->localdata; nb -= RX_FIRSTBUFFERSIZE; p->length += RX_FIRSTBUFFERSIZE; break; default: { struct rx_cbuf *cb; if ((cb = allocCBuf()) != NULL) { p->wirevec[i].iov_base = (caddr_t) cb->data; p->wirevec[i].iov_len = RX_CBUFFERSIZE; nb -= RX_CBUFFERSIZE; p->length += RX_CBUFFERSIZE; p->niovecs++; } else i = RX_MAXWVECS; } break; } } return nb; } int rxi_FreeDataBufs(struct rx_packet *p, int first) { int i; if (first != 1) /* MTUXXX */ osi_Panic("FreeDataBufs 1: first must be 1"); for (i = first; i < RX_MAXWVECS; i++) { if (p->wirevec[i].iov_base) { if (p->wirevec[i].iov_base != (caddr_t) p->localdata) { freeCBuf((struct rx_cbuf *)((char *)p->wirevec[i].iov_base - sizeof(struct rx_queue))); } p->wirevec[i].iov_base = NULL; } else if (i == 1) /* MTUXXX */ osi_Panic("FreeDataBufs 4: vec 1 must not be NULL"); p->wirevec[i].iov_len = 0; } p->length = 0; return 0; } /* * add n more fragment buffers (continuation buffers) * Must be called at user priority or will crash RS/6000s */ void rxi_MoreCbufs(int n) { struct rx_cbuf *c, *e; int getme; SPLVAR; if (!n) return; getme = n * sizeof(struct rx_cbuf); c = rx_mallocedC = (struct rx_cbuf *) osi_Alloc(getme); if (!c) return; memset(c, 0, getme); PIN(c, getme); /* XXXXX */ NETPRI; MObtainWriteLock(&rx_freePktQ_lock); for (e = c + n; c < e; c++) { queue_Append(&rx_freeCbufQueue, c); } rxi_NeedMoreCbufs = FALSE; rx_nFreeCbufs += n; rx_nCbufs += n; MReleaseWriteLock(&rx_freePktQ_lock); USERPRI; return; } /* Add more packet buffers */ void rxi_MorePackets(int apackets) { struct rx_packet *p, *e; int getme; SPLVAR; getme = apackets * sizeof(struct rx_packet); p = rx_mallocedP = (struct rx_packet *) osi_Alloc(getme); PIN(p, getme); /* XXXXX */ memset((char *) p, 0, getme); NETPRI; MObtainWriteLock(&rx_freePktQ_lock); for (e = p + apackets; p < e; p++) { p->wirevec[0].iov_base = (char *) (p->wirehead); p->wirevec[0].iov_len = RX_HEADER_SIZE; p->wirevec[1].iov_base = (char *) (p->localdata); p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE; p->niovecs = 2; queue_Append(&rx_freePacketQueue, p); } rx_nFreePackets += apackets; MReleaseWriteLock(&rx_freePktQ_lock); USERPRI; /* * allocate enough cbufs that 1/4 of the packets will be able to hold * maximal amounts of data */ /* MTUXXX enable this -- currently disabled for testing rxi_MoreCbufs((apackets/4)*(rx_maxReceiveSize - RX_FIRSTBUFFERSIZE)/RX_CBUFFERSIZE); */ } void rxi_FreeAllPackets(void) { /* must be called at proper interrupt level, etcetera */ /* MTUXXX need to free all Cbufs */ osi_Free(rx_mallocedP, (rx_Window + 2) * sizeof(struct rx_packet)); UNPIN(rx_mallocedP, (rx_Window + 2) * sizeof(struct rx_packet)); return; } /* * In the packet freeing routine below, the assumption is that * we want all of the packets to be used equally frequently, so that we * don't get packet buffers paging out. It would be just as valid to * assume that we DO want them to page out if not many are being used. * In any event, we assume the former, and append the packets to the end * of the free list. */ /* * This explanation is bogus. The free list doesn't remain in any kind of * useful order for long: the packets in use get pretty much randomly scattered * across all the pages. In order to permit unused {packets,bufs} to page * out, they must be stored so that packets which are adjacent in memory are * adjacent in the free list. An array springs rapidly to mind. */ /* * Free the packet p. P is assumed not to be on any queue, i.e. * remove it yourself first if you call this routine. */ void rxi_FreePacket(struct rx_packet *p) { SPLVAR; dpf(("Free %x\n", p)); rxi_FreeDataBufs(p, 1); /* this gets the locks below, so must * call it first */ NETPRI; MObtainWriteLock(&rx_freePktQ_lock); rx_nFreePackets++; queue_Append(&rx_freePacketQueue, p); /* Wakeup anyone waiting for packets */ rxi_PacketsUnWait(); MReleaseWriteLock(&rx_freePktQ_lock); USERPRI; } /* * rxi_AllocPacket sets up p->length so it reflects the number of * bytes in the packet at this point, **not including** the header. * The header is absolutely necessary, besides, this is the way the * length field is usually used */ struct rx_packet * rxi_AllocPacket(int class) { struct rx_packet *p; if (rxi_OverQuota(class)) { rx_stats.noPackets[class]++; return NULL; } rx_stats.packetRequests++; MObtainWriteLock(&rx_freePktQ_lock); rx_nFreePackets--; if (queue_IsEmpty(&rx_freePacketQueue)) osi_Panic("rxi_AllocPacket error"); p = queue_First(&rx_freePacketQueue, rx_packet); dpf(("Alloc %x, class %d\n", queue_First(&rx_freePacketQueue, rx_packet), class)); queue_Remove(p); MReleaseWriteLock(&rx_freePktQ_lock); /* * have to do this here because rx_FlushWrite fiddles with the iovs in * order to truncate outbound packets. In the near future, may need to * allocate bufs from a static pool here, and/or in AllocSendPacket */ p->wirevec[0].iov_base = (char *) (p->wirehead); p->wirevec[0].iov_len = RX_HEADER_SIZE; p->wirevec[1].iov_base = (char *) (p->localdata); p->wirevec[1].iov_len = RX_FIRSTBUFFERSIZE; p->niovecs = 2; p->length = RX_FIRSTBUFFERSIZE; return p; } /* * This guy comes up with as many buffers as it {takes,can get} given * the MTU for this call. It also sets the packet length before * returning. caution: this is often called at NETPRI */ struct rx_packet * rxi_AllocSendPacket(struct rx_call *call, int want) { struct rx_packet *p = (struct rx_packet *) 0; int mud; SPLVAR; mud = call->conn->maxPacketSize - RX_HEADER_SIZE; while (!(call->error)) { /* if an error occurred, or we get the packet we want, we're done */ if ((p = rxi_AllocPacket(RX_PACKET_CLASS_SEND)) != NULL) { want += rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) + rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call)); want = MIN(want, mud); if (want > p->length) (void) rxi_AllocDataBuf(p, (want - p->length)); if (p->length > mud) p->length = mud; p->length -= rx_GetSecurityHeaderSize(rx_ConnectionOf(call)) + rx_GetSecurityMaxTrailerSize(rx_ConnectionOf(call)); if (p->length <= 0) { rxi_FreePacket(p); p = NULL; } break; } /* * no error occurred, and we didn't get a packet, so we sleep. At * this point, we assume that packets will be returned sooner or * later, as packets are acknowledged, and so we just wait. */ NETPRI; RX_MUTEX_ENTER(&rx_waitingForPackets_lock); rx_waitingForPackets = 1; call->flags |= RX_CALL_WAIT_PACKETS; #ifdef RX_ENABLE_LOCKS cv_wait(&rx_waitingForPackets_cv, &rx_waitingForPackets_lock); #else osi_rxSleep(&rx_waitingForPackets); #endif call->flags &= ~RX_CALL_WAIT_PACKETS; RX_MUTEX_EXIT(&rx_waitingForPackets_lock); USERPRI; } return p; } #ifndef KERNEL /* count the number of used FDs */ static int CountFDs(int amax) { struct stat tstat; int i, code; int count; count = 0; for (i = 0; i < amax; i++) { code = fstat(i, &tstat); if (code == 0) count++; } return count; } /* * This function reads a single packet from the interface into the * supplied packet buffer (*p). Return 0 if the packet is bogus. The * (host,port) of the sender are stored in the supplied variables, and * the data length of the packet is stored in the packet structure. * The header is decoded. */ int rxi_ReadPacket(int socket, struct rx_packet *p, uint32_t *host, uint16_t *port) { struct sockaddr_in from; int nbytes; long rlen; long tlen; long _tlen; struct msghdr msg; uint32_t dummy; /* was using rlen but had aliasing * problems */ rx_computelen(p, tlen); rx_SetDataSize(p, tlen); /* this is the size of the user data * area */ tlen += RX_HEADER_SIZE; /* now this is the size of the entire * packet */ rlen = rx_maxReceiveSize; /* this is what I am advertising. * Only check it once in order to * avoid races. */ _tlen = rlen - tlen; if (_tlen > 0) { _tlen = rxi_AllocDataBuf(p, _tlen); if (_tlen >0) { _tlen = rlen - _tlen; } else _tlen = rlen; } else _tlen = rlen; tlen=(tlen>_tlen)?tlen:_tlen; /* * set up this one iovec for padding, it's just to make sure that the * read doesn't return more data than we expect, and is done to get * around our problems caused by the lack of a length field in the rx * header. */ p->wirevec[p->niovecs].iov_base = (caddr_t) & dummy; p->wirevec[p->niovecs++].iov_len = 4; memset(&msg, 0, sizeof(msg)); msg.msg_name = (char *) &from; msg.msg_namelen = sizeof(struct sockaddr_in); msg.msg_iov = p->wirevec; msg.msg_iovlen = p->niovecs; #if 0 msg.msg_accrights = NULL; msg.msg_accrightslen = 0; #endif nbytes = recvmsg(socket, &msg, 0); /* restore the vec to its correct state */ p->wirevec[--p->niovecs].iov_base = NULL; p->wirevec[p->niovecs].iov_len = 0; if (nbytes < 0) { /* ignore error? */ return 0; } p->length = (nbytes - RX_HEADER_SIZE); if ((nbytes > tlen) || (nbytes < (int)RX_HEADER_SIZE)) { /* Bogus packet */ if (nbytes > 0) rxi_MoreCbufs(rx_Window); if (nbytes > tlen) rxi_AllocDataBuf(p, nbytes - tlen); else if (nbytes < 0 && errno == EWOULDBLOCK) rx_stats.noPacketOnRead++; else { rx_stats.bogusPacketOnRead++; rx_stats.bogusHost = from.sin_addr.s_addr; dpf(("B: bogus packet from [%x,%d] nb=%d", from.sin_addr.s_addr, from.sin_port, nbytes)); } return 0; } else { /* Extract packet header. */ rxi_DecodePacketHeader(p); *host = from.sin_addr.s_addr; *port = from.sin_port; if (p->header.type > 0 && p->header.type < RX_N_PACKET_TYPES) rx_stats.packetsRead[p->header.type - 1]++; return 1; } } /* Send a udp datagram */ int osi_NetSend(osi_socket socket, char *addr, struct iovec *dvec, int nvecs, int length) { struct msghdr msg; memset(&msg, 0, sizeof(msg)); msg.msg_iov = dvec; msg.msg_iovlen = nvecs; msg.msg_name = addr; msg.msg_namelen = sizeof(struct sockaddr_in); #if 0 msg.msg_accrights = NULL; msg.msg_accrightslen = 0; #endif while (sendmsg(socket, &msg, 0) == -1) { int err; fd_set sfds; rx_stats.sendSelects++; if (errno != EWOULDBLOCK && errno != ENOBUFS && errno != ECONNREFUSED) { osi_Msg(("rx failed to send packet: %s ", strerror(errno))); return 3; } dpf(("rx_send failed with %d\n", errno)); FD_ZERO(&sfds); if (socket >= FD_SETSIZE) osi_Panic("osi_NetSend: fd too large"); FD_SET(socket, &sfds); while ((err = select(socket + 1, 0, &sfds, 0, 0)) != 1) { if (err >= 0 || errno != EINTR) osi_Panic("osi_NetSend: select error %d.%d", err, errno); } } return 0; } #else /* KERNEL */ /* * osi_NetSend is defined in afs/afs_osinet.c * message receipt is done in rxk_input or rx_put. */ #ifdef AFS_SUN5_ENV /* * Copy an mblock to the contiguous area pointed to by cp. * MTUXXX Supposed to skip bytes and copy bytes, * but it doesn't really. * Returns the number of bytes not transferred. * The message is NOT changed. */ static int cpytoc(mblk_t *mp, int off, int len, char *cp) { int n; for (; mp && len > 0; mp = mp->b_cont) { if (mp->b_datap->db_type != M_DATA) { return -1; } n = MIN(len, (mp->b_wptr - mp->b_rptr)); memcpy(cp, mp->b_rptr, n); cp += n; len -= n; mp->b_rptr += n; } return (len); } /* * MTUXXX Supposed to skip bytes and copy bytes, * but it doesn't really. * This sucks, anyway, do it like m_cpy.... below */ static int cpytoiovec(mblk_t *mp, int off, int len, struct iovec *iovs, int niovs) { int m, n, o, t, i; for (i = -1, t = 0; i < niovs && mp && len > 0; mp = mp->b_cont) { if (mp->b_datap->db_type != M_DATA) { return -1; } n = MIN(len, (mp->b_wptr - mp->b_rptr)); len -= n; while (n) { if (!t) { o = 0; i++; t = iovs[i].iov_len; } m = MIN(n, t); memcpy(iovs[i].iov_base + o, mp->b_rptr, m); mp->b_rptr += m; o += m; t -= m; n -= m; } } return (len); } #define m_cpytoc(a, b, c, d) cpytoc(a, b, c, d) #define m_cpytoiovec(a, b, c, d, e) cpytoiovec(a, b, c, d, e) #else static int m_cpytoiovec(struct mbuf *m, int off, int len, struct iovec iovs[], int niovs) { caddr_t p1, p2; unsigned int l1, l2, i, t; if (m == NULL || off < 0 || len < 0 || iovs == NULL) panic("m_cpytoiovec"); /* MTUXXX probably don't need this * check */ while (off && m) if (m->m_len <= off) { off -= m->m_len; m = m->m_next; continue; } else break; if (m == NULL) return len; p1 = mtod(m, caddr_t) + off; l1 = m->m_len - off; i = 0; p2 = iovs[0].iov_base; l2 = iovs[0].iov_len; while (len) { t = MIN(l1, MIN(l2, (unsigned int) len)); memcpy(p2, p1, t); p1 += t; p2 += t; l1 -= t; l2 -= t; len -= t; if (!l1) { m = m->m_next; if (!m) break; p1 = mtod(m, caddr_t); l1 = m->m_len; } if (!l2) { if (++i >= niovs) break; p2 = iovs[i].iov_base; l2 = iovs[i].iov_len; } } return len; } #endif /* AFS_SUN5_ENV */ int rx_mb_to_packet(char *amb, void (*free)(), int hdr_len, int data_len, struct rx_packet *phandle) { int code; code = m_cpytoiovec(amb, hdr_len, data_len, phandle->wirevec, phandle->niovecs); (*free) (amb); return code; } #define CountFDs(amax) amax #endif /* KERNEL */ /* send a response to a debug packet */ struct rx_packet * rxi_ReceiveDebugPacket(struct rx_packet *ap, osi_socket asocket, uint32_t ahost, uint16_t aport) { struct rx_debugIn tin; long tl; rx_packetread(ap, 0, sizeof(struct rx_debugIn), (char *) &tin); /* * all done with packet, now set length to the truth, so we can reuse * this packet */ rx_computelen(ap, ap->length); tin.type = ntohl(tin.type); tin.index = ntohl(tin.index); switch (tin.type) { case RX_DEBUGI_GETSTATS:{ struct rx_debugStats tstat; /* get basic stats */ memset((char *) &tstat, 0, sizeof(tstat)); /* make sure spares are * zero */ tstat.version = RX_DEBUGI_VERSION; #ifndef RX_ENABLE_LOCKS tstat.waitingForPackets = rx_waitingForPackets; #endif tstat.nFreePackets = htonl(rx_nFreePackets); tstat.callsExecuted = htonl(rxi_nCalls); tstat.packetReclaims = htonl(0); tstat.usedFDs = CountFDs(64); tstat.nWaiting = htonl(rx_nWaiting); tl = sizeof(struct rx_debugStats) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl); if (tl <= 0) { rx_packetwrite(ap, 0, sizeof(struct rx_debugStats), (char *) &tstat); ap->length = sizeof(struct rx_debugStats); rxi_SendDebugPacket(ap, asocket, ahost, aport); rx_computelen(ap, ap->length); } break; } case RX_DEBUGI_GETALLCONN: case RX_DEBUGI_GETCONN:{ int i, j; struct rx_connection *tc; struct rx_call *tcall; struct rx_debugConn tconn; int all = (tin.type == RX_DEBUGI_GETALLCONN); tl = sizeof(struct rx_debugConn) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl); if (tl > 0) return ap; memset((char *) &tconn, 0, sizeof(tconn)); /* make sure spares are * zero */ /* get N'th (maybe) "interesting" connection info */ for (i = 0; i < rx_hashTableSize; i++) { for (tc = rx_connHashTable[i]; tc; tc = tc->next) { if ((all || rxi_IsConnInteresting(tc)) && tin.index-- <= 0) { tconn.host = tc->peer->host; tconn.port = tc->peer->port; tconn.cid = htonl(tc->cid); tconn.epoch = htonl(tc->epoch); tconn.serial = htonl(tc->serial); for (j = 0; j < RX_MAXCALLS; j++) { tconn.callNumber[j] = htonl(tc->callNumber[j]); if ((tcall = tc->call[j]) != NULL) { tconn.callState[j] = tcall->state; tconn.callMode[j] = tcall->mode; tconn.callFlags[j] = tcall->flags; if (queue_IsNotEmpty(&tcall->rq)) tconn.callOther[j] |= RX_OTHER_IN; if (queue_IsNotEmpty(&tcall->tq)) tconn.callOther[j] |= RX_OTHER_OUT; } else tconn.callState[j] = RX_STATE_NOTINIT; } tconn.maxPacketSize = htonl(tc->maxPacketSize); tconn.error = htonl(tc->error); tconn.flags = tc->flags; tconn.type = tc->type; tconn.securityIndex = tc->securityIndex; if (tc->securityObject) { RXS_GetStats(tc->securityObject, tc, &tconn.secStats); #define DOHTONL(a) (tconn.secStats.a = htonl(tconn.secStats.a)) #define DOHTONS(a) (tconn.secStats.a = htons(tconn.secStats.a)) DOHTONL(flags); DOHTONL(expires); DOHTONL(packetsReceived); DOHTONL(packetsSent); DOHTONL(bytesReceived); DOHTONL(bytesSent); for (i = 0; i < sizeof(tconn.secStats.spares) / sizeof(int16_t); i++) DOHTONS(spares[i]); for (i = 0; i < sizeof(tconn.secStats.sparel) / 4; i++) DOHTONL(sparel[i]); } rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), (char *) &tconn); tl = ap->length; ap->length = sizeof(struct rx_debugConn); rxi_SendDebugPacket(ap, asocket, ahost, aport); ap->length = tl; return ap; } } } /* if we make it here, there are no interesting packets */ tconn.cid = htonl(0xffffffff); /* means end */ rx_packetwrite(ap, 0, sizeof(struct rx_debugConn), &tconn); tl = ap->length; ap->length = sizeof(struct rx_debugConn); rxi_SendDebugPacket(ap, asocket, ahost, aport); ap->length = tl; break; } case RX_DEBUGI_RXSTATS:{ int i; uint32_t *s; tl = sizeof(rx_stats) - ap->length; if (tl > 0) tl = rxi_AllocDataBuf(ap, tl); if (tl > 0) return ap; /* Since its all longs convert to network order with a loop. */ s = (uint32_t *) &rx_stats; for (i = 0; i < sizeof(rx_stats) / 4; i++, s++) rx_SlowPutLong(ap, i * 4, htonl(*s)); tl = ap->length; ap->length = sizeof(rx_stats); rxi_SendDebugPacket(ap, asocket, ahost, aport); ap->length = tl; break; } default: /* error response packet */ tin.type = htonl(RX_DEBUGI_BADTYPE); tin.index = tin.type; rx_packetwrite(ap, 0, sizeof(struct rx_debugIn), &tin); tl = ap->length; ap->length = sizeof(struct rx_debugIn); rxi_SendDebugPacket(ap, asocket, ahost, aport); ap->length = tl; break; } return ap; } struct rx_packet * rxi_ReceiveVersionPacket(struct rx_packet *ap, osi_socket asocket, uint32_t ahost, uint16_t aport) { long tl; rx_packetwrite(ap, 0, CML_VERSION_NUMBER_SIZE, cml_version_number); tl = ap->length; ap->length = 65; rxi_SendDebugPacket(ap, asocket, ahost, aport); ap->length = tl; return ap; } /* send a debug packet back to the sender */ void rxi_SendDebugPacket(struct rx_packet *apacket, osi_socket asocket, uint32_t ahost, uint16_t aport) { struct sockaddr_in taddr; int i = 0; int savelen = 0; int saven = 0; int nbytes; taddr.sin_family = AF_INET; taddr.sin_port = aport; taddr.sin_addr.s_addr = ahost; nbytes = apacket->length; for (i = 1; i < apacket->niovecs; i++) { if (nbytes <= apacket->wirevec[i].iov_len) { savelen = apacket->wirevec[i].iov_len; saven = apacket->niovecs; apacket->wirevec[i].iov_len = nbytes; apacket->niovecs = i + 1; /* so condition fails because i == niovecs */ } else nbytes -= apacket->wirevec[i].iov_len; } GLOBAL_UNLOCK(); /* debug packets are not reliably delivered, hence the cast below. */ /* MTUXXX need to adjust lengths as in sendSpecial */ (void) osi_NetSend(asocket, (char *)&taddr, apacket->wirevec, apacket->niovecs, apacket->length + RX_HEADER_SIZE); GLOBAL_LOCK(); if (saven) { apacket->wirevec[i - 1].iov_len = savelen; apacket->niovecs = saven; } } /* * Send the packet to appropriate destination for the specified * connection. The header is first encoded and placed in the packet. */ void rxi_SendPacket(struct rx_connection *conn, struct rx_packet *p) { struct sockaddr_in addr; struct rx_peer *peer = conn->peer; osi_socket socket; #ifdef RXDEBUG char deliveryType = 'S'; #endif memset(&addr, 0, sizeof(addr)); /* The address we're sending the packet to */ addr.sin_family = AF_INET; addr.sin_port = peer->port; addr.sin_addr.s_addr = peer->host; /* * This stuff should be revamped, I think, so that most, if not all, of * the header stuff is always added here. We could probably do away with * the encode/decode routines. XXXXX */ /* * Stamp each packet with a unique serial number. The serial number is * maintained on a connection basis because some types of security may be * based on the serial number of the packet, and security is handled on a * per authenticated-connection basis. */ /* * Pre-increment, to guarantee no zero serial number; a zero serial * number means the packet was never sent. */ p->header.serial = ++conn->serial; /* * This is so we can adjust retransmit time-outs better in the face of * rapidly changing round-trip times. RTO estimation is not a la Karn. */ if (p->firstSerial == 0) { p->firstSerial = p->header.serial; } #ifdef RXDEBUG /* * If an output tracer function is defined, call it with the packet and * network address. Note this function may modify its arguments. */ if (rx_almostSent) { int drop = (*rx_almostSent) (p, &addr); /* drop packet if return value is non-zero? */ if (drop) deliveryType = 'D'; /* Drop the packet */ } #endif /* Get network byte order header */ rxi_EncodePacketHeader(p); /* XXX in the event of rexmit, etc, * don't need to touch ALL the fields */ /* * Send the packet out on the same socket that related packets are being * received on */ socket = (conn->type == RX_CLIENT_CONNECTION ? rx_socket : conn->service->socket); #ifdef RXDEBUG /* Possibly drop this packet, for testing purposes */ if ((deliveryType == 'D') || ((rx_intentionallyDroppedPacketsPer100 > 0) && (random() % 100 < rx_intentionallyDroppedPacketsPer100))) { deliveryType = 'D'; /* Drop the packet */ } else { deliveryType = 'S'; /* Send the packet */ #endif /* RXDEBUG */ /* * Loop until the packet is sent. We'd prefer just to use a blocking * socket, but unfortunately the interface doesn't allow us to have * the socket block in send mode, and not block in receive mode */ GLOBAL_UNLOCK(); if (osi_NetSend(socket, (char *)&addr, p->wirevec, p->niovecs, p->length + RX_HEADER_SIZE)) { /* send failed, so let's hurry up the resend, eh? */ rx_stats.netSendFailures++; clock_Zero(&p->retryTime); p->header.serial = 0; /* Another way of saying never * transmitted... */ } GLOBAL_LOCK(); #ifdef RXDEBUG } dpf(("%c %d %s: %x.%u.%u.%u.%u.%u.%u flags %d, packet %x resend %d.%0.3d", deliveryType, p->header.serial, rx_packetTypes[p->header.type - 1], peer->host, peer->port, p->header.serial, p->header.epoch, p->header.cid, p->header.callNumber, p->header.seq, p->header.flags, p, p->retryTime.sec, p->retryTime.usec / 1000)); #endif rx_stats.packetsSent[p->header.type - 1]++; } /* * Send a "special" packet to the peer connection. If call is * specified, then the packet is directed to a specific call channel * associated with the connection, otherwise it is directed to the * connection only. Uses optionalPacket if it is supplied, rather than * allocating a new packet buffer. Nbytes is the length of the data * portion of the packet. If data is non-null, nbytes of data are * copied into the packet. Type is the type of the packet, as defined * in rx.h. Bug: there's a lot of duplication between this and other * routines. This needs to be cleaned up. */ struct rx_packet * rxi_SendSpecial(struct rx_call *call, struct rx_connection *conn, struct rx_packet *optionalPacket, int type, char *data, int nbytes) { /* * Some of the following stuff should be common code for all packet sends * (it's repeated elsewhere) */ struct rx_packet *p; int i = 0; int savelen = 0; int saven = 0; int channel, callNumber; if (call) { channel = call->channel; callNumber = *call->callNumber; } else { channel = 0; callNumber = 0; } p = optionalPacket; if (!p) { p = rxi_AllocPacket(RX_PACKET_CLASS_SPECIAL); if (!p) osi_Panic("rxi_SendSpecial failure"); } if (nbytes != -1) p->length = nbytes; else nbytes = p->length; p->header.serviceId = conn->serviceId; p->header.securityIndex = conn->securityIndex; p->header.cid = (conn->cid | channel); p->header.callNumber = callNumber; p->header.seq = 0; p->header.epoch = conn->epoch; p->header.type = type; p->header.flags = 0; if (conn->type == RX_CLIENT_CONNECTION) p->header.flags |= RX_CLIENT_INITIATED; if (data) rx_packetwrite(p, 0, nbytes, data); for (i = 1; i < p->niovecs; i++) { if (nbytes <= p->wirevec[i].iov_len) { savelen = p->wirevec[i].iov_len; saven = p->niovecs; p->wirevec[i].iov_len = nbytes; p->niovecs = i + 1; /* so condition fails because i == * niovecs */ } else nbytes -= p->wirevec[i].iov_len; } if (call) rxi_Send(call, p); else rxi_SendPacket(conn, p); if (saven) { /* means we truncated the packet * above. We probably don't */ /* really need to do this, but it seems safer this way, given that */ /* sneaky optionalPacket... */ p->wirevec[i - 1].iov_len = savelen; p->niovecs = saven; } if (!optionalPacket) rxi_FreePacket(p); return optionalPacket; } static void put32 (unsigned char **p, uint32_t u) { (*p)[0] = (u >> 24) & 0xFF; (*p)[1] = (u >> 16) & 0xFF; (*p)[2] = (u >> 8) & 0xFF; (*p)[3] = (u >> 0) & 0xFF; (*p) += 4; } static uint32_t get32 (unsigned char **p) { uint32_t u; u = ((*p)[0] << 24) | ((*p)[1] << 16) | ((*p)[2] << 8) | (*p)[3]; (*p) += 4; return u; } /* Encode the packet's header (from the struct header in the packet to * the net byte order representation in the wire representation of the * packet, which is what is actually sent out on the wire) */ void rxi_EncodePacketHeader(struct rx_packet *p) { unsigned char *buf = (unsigned char *)p->wirevec[0].iov_base; memset(buf, 0, RX_HEADER_SIZE); put32(&buf, p->header.epoch); put32(&buf, p->header.cid); put32(&buf, p->header.callNumber); put32(&buf, p->header.seq); put32(&buf, p->header.serial); put32(&buf, ((((unsigned long) p->header.type) << 24) | (((unsigned long) p->header.flags) << 16) | (p->header.userStatus << 8) | p->header.securityIndex)); /* Note: top 16 bits of this next word were reserved */ put32(&buf, ((p->header.spare << 16) | (p->header.serviceId & 0xffff))); } /* Decode the packet's header (from net byte order to a struct header) */ void rxi_DecodePacketHeader(struct rx_packet *p) { unsigned char *buf = (unsigned char *)p->wirevec[0].iov_base; uint32_t temp; p->header.epoch = get32(&buf); p->header.cid = get32(&buf); p->header.callNumber = get32(&buf); p->header.seq = get32(&buf); p->header.serial = get32(&buf); temp = get32(&buf); /* C will truncate byte fields to bytes for me */ p->header.type = temp >> 24; p->header.flags = temp >> 16; p->header.userStatus = temp >> 8; p->header.securityIndex = temp >> 0; temp = get32(&buf); p->header.serviceId = (temp & 0xffff); p->header.spare = temp >> 16; /* Note: top 16 bits of this last word are the security checksum */ } void rxi_PrepareSendPacket(struct rx_call *call, struct rx_packet *p, int last) { struct rx_connection *conn = call->conn; int len, i; p->acked = 0; p->header.cid = (conn->cid | call->channel); p->header.serviceId = conn->serviceId; p->header.securityIndex = conn->securityIndex; p->header.callNumber = *call->callNumber; p->header.seq = call->tnext++; p->header.epoch = conn->epoch; p->header.type = RX_PACKET_TYPE_DATA; p->header.flags = 0; p->header.spare = 0; if (conn->type == RX_CLIENT_CONNECTION) p->header.flags |= RX_CLIENT_INITIATED; if (last) p->header.flags |= RX_LAST_PACKET; clock_Zero(&p->retryTime); /* Never yet transmitted */ p->header.serial = 0; /* Another way of saying never * transmitted... */ p->backoff = 0; /* * Now that we're sure this is the last data on the call, make sure that * the "length" and the sum of the iov_lens matches. */ len = p->length + call->conn->securityHeaderSize; for (i = 1; i < p->niovecs && len > 0; i++) { len -= p->wirevec[i].iov_len; } if (len > 0) { osi_Panic("PrepareSendPacket 1\n"); /* MTUXXX */ } else { p->niovecs = i; p->wirevec[i - 1].iov_len += len; } RXS_PreparePacket(conn->securityObject, call, p); }