/* * xfrm_output.c - Common IPsec encapsulation code. * * Copyright (c) 2007 Herbert Xu * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ #include #include #include #include #include #include #include #include #include #include static struct avm_pa_dev_info xfrm_out_dev_info __read_mostly; static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb); static int xfrm_skb_check_space(struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); int nhead = dst->header_len + LL_RESERVED_SPACE(dst->dev) - skb_headroom(skb); int ntail = dst->dev->needed_tailroom - skb_tailroom(skb); if (nhead <= 0) { if (ntail <= 0) return 0; nhead = 0; } else if (ntail < 0) ntail = 0; return pskb_expand_head(skb, nhead, ntail, GFP_ATOMIC); } /* Children define the path of the packet through the * Linux networking. Thus, destinations are stackable. */ static struct dst_entry *skb_dst_pop(struct sk_buff *skb) { struct dst_entry *child = dst_clone(skb_dst(skb)->child); skb_dst_drop(skb); return child; } static int xfrm_output_one(struct sk_buff *skb, int err) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; struct net *net = xs_net(x); if (err <= 0) goto resume; do { err = xfrm_skb_check_space(skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); goto error_nolock; } err = x->outer_mode->output(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEMODEERROR); goto error_nolock; } spin_lock_bh(&x->lock); if (unlikely(x->km.state != XFRM_STATE_VALID)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEINVALID); err = -EINVAL; goto error; } err = xfrm_state_check_expire(x); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEEXPIRED); goto error; } err = x->repl->overflow(x, skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATESEQERROR); goto error; } x->curlft.bytes += skb->len; x->curlft.packets++; spin_unlock_bh(&x->lock); skb_dst_force(skb); err = x->type->output(x, skb); if (err == -EINPROGRESS) goto out; resume: if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTSTATEPROTOERROR); goto error_nolock; } dst = skb_dst_pop(skb); if (!dst) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); err = -EHOSTUNREACH; goto error_nolock; } skb_dst_set(skb, dst); x = dst->xfrm; } while (x && !(x->outer_mode->flags & XFRM_MODE_FLAG_TUNNEL)); return 0; error: spin_unlock_bh(&x->lock); error_nolock: kfree_skb(skb); out: return err; } int xfrm_output_resume(struct sk_buff *skb, int err) { struct net *net = xs_net(skb_dst(skb)->xfrm); while (likely((err = xfrm_output_one(skb, err)) == 0)) { nf_reset_no_generic_ct(skb); if (!skb_dst(skb)->xfrm) { /* Now encrypted, try to accelerate the other half. * The skb must be reset to allow for a secondary session. */ avm_pa_reset_skb(skb); } /* We want to invert the likely indication because avm_pa * very likely accelerates the packet. We take the opportunity * and do a direct call optimization as well. */ if (skb_dst(skb)->ops->local_out == __ip_local_out) err = __ip_local_out(net, skb->sk, skb); else if (skb_dst(skb)->ops->local_out == __ip6_local_out) err = __ip6_local_out(net, skb->sk, skb); else err = skb_dst(skb)->ops->local_out(net, skb->sk, skb); if (likely(err != 1)) /* likely accelerated by avm_pa */ goto out; if (!skb_dst(skb)->xfrm) return dst_output(net, skb->sk, skb); err = nf_hook(skb_dst(skb)->ops->family, NF_INET_POST_ROUTING, net, skb->sk, skb, NULL, skb_dst(skb)->dev, xfrm_output2); if (unlikely(err != 1)) goto out; } if (err == -EINPROGRESS) err = 0; out: return err; } EXPORT_SYMBOL_GPL(xfrm_output_resume); static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb) { return xfrm_output_resume(skb, 1); } static int xfrm_output_gso(struct net *net, struct sock *sk, struct sk_buff *skb) { struct sk_buff *segs; BUILD_BUG_ON(sizeof(*IPCB(skb)) > SKB_SGO_CB_OFFSET); BUILD_BUG_ON(sizeof(*IP6CB(skb)) > SKB_SGO_CB_OFFSET); segs = skb_gso_segment(skb, 0); kfree_skb(skb); if (IS_ERR(segs)) return PTR_ERR(segs); if (segs == NULL) return -EINVAL; do { struct sk_buff *nskb = segs->next; int err; segs->next = NULL; err = xfrm_output2(net, sk, segs); if (unlikely(err)) { kfree_skb_list(nskb); return err; } segs = nskb; } while (segs); return 0; } int xfrm_output(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); int err; avm_pa_add_xfrm_session(&xfrm_out_dev_info, skb, skb_dst(skb)->xfrm); if (skb_is_gso(skb)) return xfrm_output_gso(net, sk, skb); if (skb->ip_summed == CHECKSUM_PARTIAL) { err = skb_checksum_help(skb); if (err) { XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTERROR); kfree_skb(skb); return err; } } return xfrm_output2(net, sk, skb); } EXPORT_SYMBOL_GPL(xfrm_output); int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb) { struct xfrm_mode *inner_mode; if (x->sel.family == AF_UNSPEC) inner_mode = xfrm_ip2inner_mode(x, xfrm_af2proto(skb_dst(skb)->ops->family)); else inner_mode = x->inner_mode; if (inner_mode == NULL) return -EAFNOSUPPORT; return inner_mode->afinfo->extract_output(x, skb); } EXPORT_SYMBOL_GPL(xfrm_inner_extract_output); void xfrm_local_error(struct sk_buff *skb, int mtu) { unsigned int proto; struct xfrm_state_afinfo *afinfo; if (skb->protocol == htons(ETH_P_IP)) proto = AF_INET; else if (skb->protocol == htons(ETH_P_IPV6) && skb->sk->sk_family == AF_INET6) proto = AF_INET6; else return; afinfo = xfrm_state_get_afinfo(proto); if (!afinfo) return; afinfo->local_error(skb, mtu); xfrm_state_put_afinfo(afinfo); } EXPORT_SYMBOL_GPL(xfrm_local_error); static void xfrm_out_transmit(void *arg, struct sk_buff *skb) { int ret; ret = xfrm_output(NULL, skb); if (unlikely(ret < 0)) net_err_ratelimited("%s failed: %d\n", __func__, ret); } static void __init xfrm_output_avm_pa_register(void) { struct avm_pa_pid_cfg cfg = {0}; int ret; snprintf(cfg.name, sizeof(cfg.name), "xfrm_out"); cfg.framing = avm_pa_framing_ipdev; cfg.default_mtu = 0xffff; cfg.tx_func = xfrm_out_transmit; ret = avm_pa_dev_pid_register(&xfrm_out_dev_info, &cfg); if (ret < 0) pr_err("%s: failed to register avm_pa pid %s: %d\n", __func__, cfg.name, ret); } void __init xfrm_output_init(void) { xfrm_output_avm_pa_register(); }