/* ip_nat_mangle.c - generic support functions for NAT helpers * * (C) 2000-2002 by Harald Welte * * distributed under the terms of GNU GPL * * 14 Jan 2002 Harald Welte : * - add support for SACK adjustment * 14 Mar 2002 Harald Welte : * - merge SACK support into newnat API */ #include #include #include #include #include #include #include #include #include #include #include #include #include #define ASSERT_READ_LOCK(x) MUST_BE_READ_LOCKED(&ip_nat_lock) #define ASSERT_WRITE_LOCK(x) MUST_BE_WRITE_LOCKED(&ip_nat_lock) #include #include #include #include #include #include #include #if 0 #define DEBUGP printk #define DUMP_OFFSET(x) printk("offset_before=%d, offset_after=%d, correction_pos=%u\n", x->offset_before, x->offset_after, x->correction_pos); #else #define DEBUGP(format, args...) #define DUMP_OFFSET(x) #endif DECLARE_LOCK(ip_nat_seqofs_lock); static inline int ip_nat_resize_packet(struct sk_buff **skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, int new_size) { struct iphdr *iph; struct tcphdr *tcph; void *data; int dir; struct ip_nat_seq *this_way, *other_way; DEBUGP("ip_nat_resize_packet: old_size = %u, new_size = %u\n", (*skb)->len, new_size); iph = (*skb)->nh.iph; tcph = (void *)iph + iph->ihl*4; data = (void *)tcph + tcph->doff*4; dir = CTINFO2DIR(ctinfo); this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; if (new_size > (*skb)->len + skb_tailroom(*skb)) { struct sk_buff *newskb; newskb = skb_copy_expand(*skb, skb_headroom(*skb), new_size - (*skb)->len, GFP_ATOMIC); if (!newskb) { printk("ip_nat_resize_packet: oom\n"); return 0; } else { kfree_skb(*skb); *skb = newskb; } } iph = (*skb)->nh.iph; tcph = (void *)iph + iph->ihl*4; data = (void *)tcph + tcph->doff*4; DEBUGP("ip_nat_resize_packet: Seq_offset before: "); DUMP_OFFSET(this_way); LOCK_BH(&ip_nat_seqofs_lock); /* SYN adjust. If it's uninitialized, of this is after last * correction, record it: we don't handle more than one * adjustment in the window, but do deal with common case of a * retransmit */ if (this_way->offset_before == this_way->offset_after || before(this_way->correction_pos, ntohl(tcph->seq))) { this_way->correction_pos = ntohl(tcph->seq); this_way->offset_before = this_way->offset_after; this_way->offset_after = (int32_t) this_way->offset_before + new_size - (*skb)->len; } UNLOCK_BH(&ip_nat_seqofs_lock); DEBUGP("ip_nat_resize_packet: Seq_offset after: "); DUMP_OFFSET(this_way); return 1; } /* Generic function for mangling variable-length address changes inside * NATed connections (like the PORT XXX,XXX,XXX,XXX,XXX,XXX command in FTP). * * Takes care about all the nasty sequence number changes, checksumming, * skb enlargement, ... * * */ int ip_nat_mangle_tcp_packet(struct sk_buff **skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo, unsigned int match_offset, unsigned int match_len, char *rep_buffer, unsigned int rep_len) { struct iphdr *iph = (*skb)->nh.iph; struct tcphdr *tcph; unsigned char *data; u_int32_t tcplen, newlen, newtcplen; tcplen = (*skb)->len - iph->ihl*4; newtcplen = tcplen - match_len + rep_len; newlen = iph->ihl*4 + newtcplen; if (newlen > 65535) { if (net_ratelimit()) printk("ip_nat_mangle_tcp_packet: nat'ed packet " "exceeds maximum packet size\n"); return 0; } if ((*skb)->len != newlen) { if (!ip_nat_resize_packet(skb, ct, ctinfo, newlen)) { printk("resize_packet failed!!\n"); return 0; } } /* Alexey says: if a hook changes _data_ ... it can break original packet sitting in tcp queue and this is fatal */ if (skb_cloned(*skb)) { struct sk_buff *nskb = skb_copy(*skb, GFP_ATOMIC); if (!nskb) { if (net_ratelimit()) printk("Out of memory cloning TCP packet\n"); return 0; } /* Rest of kernel will get very unhappy if we pass it a suddenly-orphaned skbuff */ if ((*skb)->sk) skb_set_owner_w(nskb, (*skb)->sk); kfree_skb(*skb); *skb = nskb; } /* skb may be copied !! */ iph = (*skb)->nh.iph; tcph = (void *)iph + iph->ihl*4; data = (void *)tcph + tcph->doff*4; /* move post-replacement */ memmove(data + match_offset + rep_len, data + match_offset + match_len, ((*skb)->data+(*skb)->len) - (data + match_offset + match_len)); /* insert data from buffer */ memcpy(data + match_offset, rep_buffer, rep_len); /* update skb info */ if (newlen > (*skb)->len) { DEBUGP("ip_nat_mangle_tcp_packet: Extending packet by " "%u to %u bytes\n", newlen - (*skb)->len, newlen); skb_put(*skb, newlen - (*skb)->len); } else { DEBUGP("ip_nat_mangle_tcp_packet: Shrinking packet from " "%u to %u bytes\n", (*skb)->len, newlen); skb_trim(*skb, newlen); } /* fix checksum information */ iph->tot_len = htons(newlen); (*skb)->csum = csum_partial((char *)tcph + tcph->doff*4, newtcplen - tcph->doff*4, 0); tcph->check = 0; tcph->check = tcp_v4_check(tcph, newtcplen, iph->saddr, iph->daddr, csum_partial((char *)tcph, tcph->doff*4, (*skb)->csum)); ip_send_check(iph); return 1; } /* Adjust one found SACK option including checksum correction */ static void sack_adjust(struct tcphdr *tcph, unsigned char *ptr, struct ip_nat_seq *natseq) { struct tcp_sack_block *sp = (struct tcp_sack_block *)(ptr+2); int num_sacks = (ptr[1] - TCPOLEN_SACK_BASE)>>3; int i; for (i = 0; i < num_sacks; i++, sp++) { u_int32_t new_start_seq, new_end_seq; if (after(ntohl(sp->start_seq) - natseq->offset_before, natseq->correction_pos)) new_start_seq = ntohl(sp->start_seq) - natseq->offset_after; else new_start_seq = ntohl(sp->start_seq) - natseq->offset_before; new_start_seq = htonl(new_start_seq); if (after(ntohl(sp->end_seq) - natseq->offset_before, natseq->correction_pos)) new_end_seq = ntohl(sp->end_seq) - natseq->offset_after; else new_end_seq = ntohl(sp->end_seq) - natseq->offset_before; new_end_seq = htonl(new_end_seq); DEBUGP("sack_adjust: start_seq: %d->%d, end_seq: %d->%d\n", ntohl(sp->start_seq), new_start_seq, ntohl(sp->end_seq), new_end_seq); tcph->check = ip_nat_cheat_check(~sp->start_seq, new_start_seq, ip_nat_cheat_check(~sp->end_seq, new_end_seq, tcph->check)); sp->start_seq = new_start_seq; sp->end_seq = new_end_seq; } } /* TCP SACK sequence number adjustment, return 0 if sack found and adjusted */ static inline int ip_nat_sack_adjust(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { struct iphdr *iph; struct tcphdr *tcph; unsigned char *ptr; int length, dir, sack_adjusted = 0; iph = skb->nh.iph; tcph = (void *)iph + iph->ihl*4; length = (tcph->doff*4)-sizeof(struct tcphdr); ptr = (unsigned char *)(tcph+1); dir = CTINFO2DIR(ctinfo); while (length > 0) { int opcode = *ptr++; int opsize; switch (opcode) { case TCPOPT_EOL: return !sack_adjusted; case TCPOPT_NOP: length--; continue; default: opsize = *ptr++; if (opsize > length) /* no partial opts */ return !sack_adjusted; if (opcode == TCPOPT_SACK) { /* found SACK */ if((opsize >= (TCPOLEN_SACK_BASE +TCPOLEN_SACK_PERBLOCK)) && !((opsize - TCPOLEN_SACK_BASE) % TCPOLEN_SACK_PERBLOCK)) sack_adjust(tcph, ptr-2, &ct->nat.info.seq[!dir]); sack_adjusted = 1; } ptr += opsize-2; length -= opsize; } } return !sack_adjusted; } /* TCP sequence number adjustment */ int ip_nat_seq_adjust(struct sk_buff *skb, struct ip_conntrack *ct, enum ip_conntrack_info ctinfo) { struct iphdr *iph; struct tcphdr *tcph; int dir, newseq, newack; struct ip_nat_seq *this_way, *other_way; iph = skb->nh.iph; tcph = (void *)iph + iph->ihl*4; dir = CTINFO2DIR(ctinfo); this_way = &ct->nat.info.seq[dir]; other_way = &ct->nat.info.seq[!dir]; if (after(ntohl(tcph->seq), this_way->correction_pos)) newseq = ntohl(tcph->seq) + this_way->offset_after; else newseq = ntohl(tcph->seq) + this_way->offset_before; newseq = htonl(newseq); if (after(ntohl(tcph->ack_seq) - other_way->offset_before, other_way->correction_pos)) newack = ntohl(tcph->ack_seq) - other_way->offset_after; else newack = ntohl(tcph->ack_seq) - other_way->offset_before; newack = htonl(newack); tcph->check = ip_nat_cheat_check(~tcph->seq, newseq, ip_nat_cheat_check(~tcph->ack_seq, newack, tcph->check)); DEBUGP("Adjusting sequence number from %u->%u, ack from %u->%u\n", ntohl(tcph->seq), ntohl(newseq), ntohl(tcph->ack_seq), ntohl(newack)); tcph->seq = newseq; tcph->ack_seq = newack; ip_nat_sack_adjust(skb, ct, ctinfo); ip_conntrack_tcp_update(ct, dir, iph, skb->len, tcph); return 0; } static inline int helper_cmp(const struct ip_nat_helper *helper, const struct ip_conntrack_tuple *tuple) { return ip_ct_tuple_mask_cmp(tuple, &helper->tuple, &helper->mask); } #define MODULE_MAX_NAMELEN 32 int ip_nat_helper_register(struct ip_nat_helper *me) { int ret = 0; if (me->me && !(me->flags & IP_NAT_HELPER_F_STANDALONE)) { struct ip_conntrack_helper *ct_helper; if ((ct_helper = ip_ct_find_helper(&me->tuple)) && ct_helper->me) { __MOD_INC_USE_COUNT(ct_helper->me); } else { /* We are a NAT helper for protocol X. If we need * respective conntrack helper for protoccol X, compute * conntrack helper name and try to load module */ char name[MODULE_MAX_NAMELEN]; const char *tmp = me->me->name; if (strlen(tmp) + 6 > MODULE_MAX_NAMELEN) { printk(__FUNCTION__ ": unable to " "compute conntrack helper name " "from %s\n", tmp); return -EBUSY; } tmp += 6; sprintf(name, "ip_conntrack%s", tmp); #ifdef CONFIG_KMOD if (!request_module(name) && (ct_helper = ip_ct_find_helper(&me->tuple)) && ct_helper->me) { __MOD_INC_USE_COUNT(ct_helper->me); } else { printk("unable to load module %s\n", name); return -EBUSY; } #else printk("unable to load module %s automatically " "because kernel was compiled without kernel " "module loader support\n", name); return -EBUSY; #endif } } WRITE_LOCK(&ip_nat_lock); if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) ret = -EBUSY; else { list_prepend(&helpers, me); MOD_INC_USE_COUNT; } WRITE_UNLOCK(&ip_nat_lock); return ret; } static int kill_helper(const struct ip_conntrack *i, void *helper) { int ret; READ_LOCK(&ip_nat_lock); ret = (i->nat.info.helper == helper); READ_UNLOCK(&ip_nat_lock); return ret; } void ip_nat_helper_unregister(struct ip_nat_helper *me) { int found = 0; WRITE_LOCK(&ip_nat_lock); /* Autoloading conntrack helper might have failed */ if (LIST_FIND(&helpers, helper_cmp, struct ip_nat_helper *,&me->tuple)) { LIST_DELETE(&helpers, me); found = 1; } WRITE_UNLOCK(&ip_nat_lock); /* Someone could be still looking at the helper in a bh. */ br_write_lock_bh(BR_NETPROTO_LOCK); br_write_unlock_bh(BR_NETPROTO_LOCK); /* Find anything using it, and umm, kill them. We can't turn them into normal connections: if we've adjusted SYNs, then they'll ackstorm. So we just drop it. We used to just bump module count when a connection existed, but that forces admins to gen fake RSTs or bounce box, either of which is just a long-winded way of making things worse. --RR */ ip_ct_selective_cleanup(kill_helper, me); if (found) MOD_DEC_USE_COUNT; /* If we are no standalone NAT helper, we need to decrement usage count * on our conntrack helper */ if (me->me && !(me->flags & IP_NAT_HELPER_F_STANDALONE)) { struct ip_conntrack_helper *ct_helper; if ((ct_helper = ip_ct_find_helper(&me->tuple)) && ct_helper->me) { __MOD_DEC_USE_COUNT(ct_helper->me); } else printk(__FUNCTION__ ": unable to decrement usage count" " of conntrack helper %s\n", me->me->name); } }