--- zzzz-none-000/linux-3.10.107/net/xfrm/xfrm_policy.c 2017-06-27 09:49:32.000000000 +0000 +++ scorpion-7490-727/linux-3.10.107/net/xfrm/xfrm_policy.c 2021-02-04 17:41:59.000000000 +0000 @@ -39,12 +39,10 @@ #define XFRM_QUEUE_TMO_MAX ((unsigned)(60*HZ)) #define XFRM_MAX_QUEUE_LEN 100 -DEFINE_MUTEX(xfrm_cfg_mutex); -EXPORT_SYMBOL(xfrm_cfg_mutex); - -static DEFINE_SPINLOCK(xfrm_policy_sk_bundle_lock); -static struct dst_entry *xfrm_policy_sk_bundles; -static DEFINE_RWLOCK(xfrm_policy_lock); +struct xfrm_flo { + struct dst_entry *dst_orig; + u8 flags; +}; static DEFINE_SPINLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo __rcu *xfrm_policy_afinfo[NPROTO] @@ -57,6 +55,7 @@ static int xfrm_bundle_ok(struct xfrm_dst *xdst); static void xfrm_policy_queue_process(unsigned long arg); +static void __xfrm_policy_link(struct xfrm_policy *pol, int dir); static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -116,7 +115,8 @@ rcu_read_unlock(); } -static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, int tos, +static inline struct dst_entry *__xfrm_dst_lookup(struct net *net, + int tos, int oif, const xfrm_address_t *saddr, const xfrm_address_t *daddr, int family) @@ -128,14 +128,15 @@ if (unlikely(afinfo == NULL)) return ERR_PTR(-EAFNOSUPPORT); - dst = afinfo->dst_lookup(net, tos, saddr, daddr); + dst = afinfo->dst_lookup(net, tos, oif, saddr, daddr); xfrm_policy_put_afinfo(afinfo); return dst; } -static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, int tos, +static inline struct dst_entry *xfrm_dst_lookup(struct xfrm_state *x, + int tos, int oif, xfrm_address_t *prev_saddr, xfrm_address_t *prev_daddr, int family) @@ -154,7 +155,7 @@ daddr = x->coaddr; } - dst = __xfrm_dst_lookup(net, tos, saddr, daddr, family); + dst = __xfrm_dst_lookup(net, tos, oif, saddr, daddr, family); if (!IS_ERR(dst)) { if (prev_saddr != saddr) @@ -176,7 +177,7 @@ static void xfrm_policy_timer(unsigned long data) { - struct xfrm_policy *xp = (struct xfrm_policy*)data; + struct xfrm_policy *xp = (struct xfrm_policy *)data; unsigned long now = get_seconds(); long next = LONG_MAX; int warn = 0; @@ -302,30 +303,27 @@ } EXPORT_SYMBOL(xfrm_policy_alloc); +static void xfrm_policy_destroy_rcu(struct rcu_head *head) +{ + struct xfrm_policy *policy = container_of(head, struct xfrm_policy, rcu); + + security_xfrm_policy_free(policy->security); + kfree(policy); +} + /* Destroy xfrm_policy: descendant resources must be released to this moment. */ void xfrm_policy_destroy(struct xfrm_policy *policy) { BUG_ON(!policy->walk.dead); - if (del_timer(&policy->timer)) + if (del_timer(&policy->timer) || del_timer(&policy->polq.hold_timer)) BUG(); - security_xfrm_policy_free(policy->security); - kfree(policy); + call_rcu(&policy->rcu, xfrm_policy_destroy_rcu); } EXPORT_SYMBOL(xfrm_policy_destroy); -static void xfrm_queue_purge(struct sk_buff_head *list) -{ - struct sk_buff *skb; - - while ((skb = skb_dequeue(list)) != NULL) { - dev_put(skb->dev); - kfree_skb(skb); - } -} - /* Rule must be locked. Release descentant resources, announce * entry dead. The rule must be unlinked from lists to the moment. */ @@ -336,8 +334,9 @@ atomic_inc(&policy->genid); - del_timer(&policy->polq.hold_timer); - xfrm_queue_purge(&policy->polq.hold_queue); + if (del_timer(&policy->polq.hold_timer)) + xfrm_pol_put(policy); + skb_queue_purge(&policy->polq.hold_queue); if (del_timer(&policy->timer)) xfrm_pol_put(policy); @@ -352,12 +351,39 @@ return __idx_hash(index, net->xfrm.policy_idx_hmask); } +/* calculate policy hash thresholds */ +static void __get_hash_thresh(struct net *net, + unsigned short family, int dir, + u8 *dbits, u8 *sbits) +{ + switch (family) { + case AF_INET: + *dbits = net->xfrm.policy_bydst[dir].dbits4; + *sbits = net->xfrm.policy_bydst[dir].sbits4; + break; + + case AF_INET6: + *dbits = net->xfrm.policy_bydst[dir].dbits6; + *sbits = net->xfrm.policy_bydst[dir].sbits6; + break; + + default: + *dbits = 0; + *sbits = 0; + } +} + static struct hlist_head *policy_hash_bysel(struct net *net, const struct xfrm_selector *sel, unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; - unsigned int hash = __sel_hash(sel, family, hmask); + unsigned int hash; + u8 dbits; + u8 sbits; + + __get_hash_thresh(net, family, dir, &dbits, &sbits); + hash = __sel_hash(sel, family, hmask, dbits, sbits); return (hash == hmask + 1 ? &net->xfrm.policy_inexact[dir] : @@ -370,25 +396,35 @@ unsigned short family, int dir) { unsigned int hmask = net->xfrm.policy_bydst[dir].hmask; - unsigned int hash = __addr_hash(daddr, saddr, family, hmask); + unsigned int hash; + u8 dbits; + u8 sbits; + + __get_hash_thresh(net, family, dir, &dbits, &sbits); + hash = __addr_hash(daddr, saddr, family, hmask, dbits, sbits); return net->xfrm.policy_bydst[dir].table + hash; } -static void xfrm_dst_hash_transfer(struct hlist_head *list, +static void xfrm_dst_hash_transfer(struct net *net, + struct hlist_head *list, struct hlist_head *ndsttable, - unsigned int nhashmask) + unsigned int nhashmask, + int dir) { struct hlist_node *tmp, *entry0 = NULL; struct xfrm_policy *pol; unsigned int h0 = 0; + u8 dbits; + u8 sbits; redo: hlist_for_each_entry_safe(pol, tmp, list, bydst) { unsigned int h; + __get_hash_thresh(net, pol->family, dir, &dbits, &sbits); h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, - pol->family, nhashmask); + pol->family, nhashmask, dbits, sbits); if (!entry0) { hlist_del(&pol->bydst); hlist_add_head(&pol->bydst, ndsttable+h); @@ -397,7 +433,7 @@ if (h != h0) continue; hlist_del(&pol->bydst); - hlist_add_after(entry0, &pol->bydst); + hlist_add_behind(&pol->bydst, entry0); } entry0 = &pol->bydst; } @@ -439,15 +475,15 @@ if (!ndst) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) - xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + xfrm_dst_hash_transfer(net, odst + i, ndst, nhashmask, dir); net->xfrm.policy_bydst[dir].table = ndst; net->xfrm.policy_bydst[dir].hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); } @@ -464,7 +500,7 @@ if (!nidx) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); for (i = hmask; i >= 0; i--) xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); @@ -472,7 +508,7 @@ net->xfrm.policy_byidx = nidx; net->xfrm.policy_idx_hmask = nhashmask; - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); } @@ -505,7 +541,7 @@ void xfrm_spd_getinfo(struct net *net, struct xfrmk_spdinfo *si) { - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); si->incnt = net->xfrm.policy_count[XFRM_POLICY_IN]; si->outcnt = net->xfrm.policy_count[XFRM_POLICY_OUT]; si->fwdcnt = net->xfrm.policy_count[XFRM_POLICY_FWD]; @@ -514,7 +550,7 @@ si->fwdscnt = net->xfrm.policy_count[XFRM_POLICY_FWD+XFRM_POLICY_MAX]; si->spdhcnt = net->xfrm.policy_idx_hmask; si->spdhmcnt = xfrm_policy_hashmax; - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_spd_getinfo); @@ -527,7 +563,7 @@ mutex_lock(&hash_resize_mutex); total = 0; - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { if (xfrm_bydst_should_resize(net, dir, &total)) xfrm_bydst_resize(net, dir); } @@ -537,9 +573,89 @@ mutex_unlock(&hash_resize_mutex); } +static void xfrm_hash_rebuild(struct work_struct *work) +{ + struct net *net = container_of(work, struct net, + xfrm.policy_hthresh.work); + unsigned int hmask; + struct xfrm_policy *pol; + struct xfrm_policy *policy; + struct hlist_head *chain; + struct hlist_head *odst; + struct hlist_node *newpos; + int i; + int dir; + unsigned seq; + u8 lbits4, rbits4, lbits6, rbits6; + + mutex_lock(&hash_resize_mutex); + + /* read selector prefixlen thresholds */ + do { + seq = read_seqbegin(&net->xfrm.policy_hthresh.lock); + + lbits4 = net->xfrm.policy_hthresh.lbits4; + rbits4 = net->xfrm.policy_hthresh.rbits4; + lbits6 = net->xfrm.policy_hthresh.lbits6; + rbits6 = net->xfrm.policy_hthresh.rbits6; + } while (read_seqretry(&net->xfrm.policy_hthresh.lock, seq)); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); + + /* reset the bydst and inexact table in all directions */ + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { + INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); + hmask = net->xfrm.policy_bydst[dir].hmask; + odst = net->xfrm.policy_bydst[dir].table; + for (i = hmask; i >= 0; i--) + INIT_HLIST_HEAD(odst + i); + if ((dir & XFRM_POLICY_MASK) == XFRM_POLICY_OUT) { + /* dir out => dst = remote, src = local */ + net->xfrm.policy_bydst[dir].dbits4 = rbits4; + net->xfrm.policy_bydst[dir].sbits4 = lbits4; + net->xfrm.policy_bydst[dir].dbits6 = rbits6; + net->xfrm.policy_bydst[dir].sbits6 = lbits6; + } else { + /* dir in/fwd => dst = local, src = remote */ + net->xfrm.policy_bydst[dir].dbits4 = lbits4; + net->xfrm.policy_bydst[dir].sbits4 = rbits4; + net->xfrm.policy_bydst[dir].dbits6 = lbits6; + net->xfrm.policy_bydst[dir].sbits6 = rbits6; + } + } + + /* re-insert all policies by order of creation */ + list_for_each_entry_reverse(policy, &net->xfrm.policy_all, walk.all) { + newpos = NULL; + chain = policy_hash_bysel(net, &policy->selector, + policy->family, + xfrm_policy_id2dir(policy->index)); + hlist_for_each_entry(pol, chain, bydst) { + if (policy->priority >= pol->priority) + newpos = &pol->bydst; + else + break; + } + if (newpos) + hlist_add_behind(&policy->bydst, newpos); + else + hlist_add_head(&policy->bydst, chain); + } + + write_unlock_bh(&net->xfrm.xfrm_policy_lock); + + mutex_unlock(&hash_resize_mutex); +} + +void xfrm_policy_hash_rebuild(struct net *net) +{ + schedule_work(&net->xfrm.policy_hthresh.work); +} +EXPORT_SYMBOL(xfrm_policy_hash_rebuild); + /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(struct net *net, int dir) +static u32 xfrm_gen_index(struct net *net, int dir, u32 index) { static u32 idx_generator; @@ -549,8 +665,14 @@ u32 idx; int found; - idx = (idx_generator | dir); - idx_generator += 8; + if (!index) { + idx = (idx_generator | dir); + idx_generator += 8; + } else { + idx = index; + index = 0; + } + if (idx == 0) idx = 8; list = net->xfrm.policy_byidx + idx_hash(net, idx); @@ -587,22 +709,24 @@ struct xfrm_policy_queue *pq = &old->polq; struct sk_buff_head list; + if (skb_queue_empty(&pq->hold_queue)) + return; + __skb_queue_head_init(&list); spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice_init(&pq->hold_queue, &list); - del_timer(&pq->hold_timer); + if (del_timer(&pq->hold_timer)) + xfrm_pol_put(old); spin_unlock_bh(&pq->hold_queue.lock); - if (skb_queue_empty(&list)) - return; - pq = &new->polq; spin_lock_bh(&pq->hold_queue.lock); skb_queue_splice(&list, &pq->hold_queue); pq->timeout = XFRM_QUEUE_TMO_MIN; - mod_timer(&pq->hold_timer, jiffies); + if (!mod_timer(&pq->hold_timer, jiffies)) + xfrm_pol_hold(new); spin_unlock_bh(&pq->hold_queue.lock); } @@ -629,7 +753,7 @@ struct hlist_head *chain; struct hlist_node *newpos; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, &policy->selector, policy->family, dir); delpol = NULL; newpos = NULL; @@ -640,7 +764,7 @@ xfrm_sec_ctx_match(pol->security, policy->security) && !WARN_ON(delpol)) { if (excl) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return -EEXIST; } delpol = pol; @@ -654,25 +778,29 @@ break; } if (newpos) - hlist_add_after(newpos, &policy->bydst); + hlist_add_behind(&policy->bydst, newpos); else hlist_add_head(&policy->bydst, chain); - xfrm_pol_hold(policy); - net->xfrm.policy_count[dir]++; - atomic_inc(&flow_cache_genid); - rt_genid_bump(net); + __xfrm_policy_link(policy, dir); + atomic_inc(&net->xfrm.flow_cache_genid); + + /* After previous checking, family can either be AF_INET or AF_INET6 */ + if (policy->family == AF_INET) + rt_genid_bump_ipv4(net); + else + rt_genid_bump_ipv6(net); + if (delpol) { xfrm_policy_requeue(delpol, policy); __xfrm_policy_unlink(delpol, dir); } - policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(net, dir, policy->index); hlist_add_head(&policy->byidx, net->xfrm.policy_byidx+idx_hash(net, policy->index)); policy->curlft.add_time = get_seconds(); policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) xfrm_pol_hold(policy); - list_add(&policy->walk.all, &net->xfrm.policy_all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (delpol) xfrm_policy_kill(delpol); @@ -692,7 +820,7 @@ struct hlist_head *chain; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_bysel(net, sel, sel->family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -705,7 +833,7 @@ *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -714,7 +842,7 @@ break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -733,7 +861,7 @@ return NULL; *err = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); chain = net->xfrm.policy_byidx + idx_hash(net, id); ret = NULL; hlist_for_each_entry(pol, chain, byidx) { @@ -744,7 +872,7 @@ *err = security_xfrm_policy_delete( pol->security); if (*err) { - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return pol; } __xfrm_policy_unlink(pol, dir); @@ -753,7 +881,7 @@ break; } } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (ret && delete) xfrm_policy_kill(ret); @@ -763,7 +891,7 @@ #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { int dir, err = 0; @@ -777,10 +905,7 @@ continue; err = security_xfrm_policy_delete(pol->security); if (err) { - xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 0, task_valid); return err; } } @@ -794,9 +919,7 @@ pol->security); if (err) { xfrm_audit_policy_delete(pol, 0, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + task_valid); return err; } } @@ -806,19 +929,19 @@ } #else static inline int -xfrm_policy_flush_secctx_check(struct net *net, u8 type, struct xfrm_audit *audit_info) +xfrm_policy_flush_secctx_check(struct net *net, u8 type, bool task_valid) { return 0; } #endif -int xfrm_policy_flush(struct net *net, u8 type, struct xfrm_audit *audit_info) +int xfrm_policy_flush(struct net *net, u8 type, bool task_valid) { int dir, err = 0, cnt = 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); - err = xfrm_policy_flush_secctx_check(net, type, audit_info); + err = xfrm_policy_flush_secctx_check(net, type, task_valid); if (err) goto out; @@ -832,16 +955,14 @@ if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again1; } @@ -853,16 +974,13 @@ if (pol->type != type) continue; __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); cnt++; - xfrm_audit_policy_delete(pol, 1, - audit_info->loginuid, - audit_info->sessionid, - audit_info->secid); + xfrm_audit_policy_delete(pol, 1, task_valid); xfrm_policy_kill(pol); - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); goto again2; } } @@ -871,7 +989,7 @@ if (!cnt) err = -ESRCH; out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return err; } EXPORT_SYMBOL(xfrm_policy_flush); @@ -891,11 +1009,13 @@ if (list_empty(&walk->walk.all) && walk->seq != 0) return 0; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); if (list_empty(&walk->walk.all)) x = list_first_entry(&net->xfrm.policy_all, struct xfrm_policy_walk_entry, all); else - x = list_entry(&walk->walk.all, struct xfrm_policy_walk_entry, all); + x = list_first_entry(&walk->walk.all, + struct xfrm_policy_walk_entry, all); + list_for_each_entry_from(x, &net->xfrm.policy_all, all) { if (x->dead) continue; @@ -917,7 +1037,7 @@ } list_del_init(&walk->walk.all); out: - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); return error; } EXPORT_SYMBOL(xfrm_policy_walk); @@ -931,14 +1051,14 @@ } EXPORT_SYMBOL(xfrm_policy_walk_init); -void xfrm_policy_walk_done(struct xfrm_policy_walk *walk) +void xfrm_policy_walk_done(struct xfrm_policy_walk *walk, struct net *net) { if (list_empty(&walk->walk.all)) return; - write_lock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME where is net? */ list_del(&walk->walk.all); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); } EXPORT_SYMBOL(xfrm_policy_walk_done); @@ -983,7 +1103,7 @@ if (unlikely(!daddr || !saddr)) return NULL; - read_lock_bh(&xfrm_policy_lock); + read_lock_bh(&net->xfrm.xfrm_policy_lock); chain = policy_hash_direct(net, daddr, saddr, family, dir); ret = NULL; hlist_for_each_entry(pol, chain, bydst) { @@ -1003,6 +1123,9 @@ } chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, chain, bydst) { + if ((pol->priority >= priority) && ret) + break; + err = xfrm_policy_match(pol, fl, type, family, dir); if (err) { if (err == -ESRCH) @@ -1011,15 +1134,15 @@ ret = ERR_PTR(err); goto fail; } - } else if (pol->priority < priority) { + } else { ret = pol; break; } } - if (ret) - xfrm_pol_hold(ret); + + xfrm_pol_hold(ret); fail: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -1092,17 +1215,25 @@ } } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, - const struct flowi *fl) +static struct xfrm_policy *xfrm_sk_policy_lookup(const struct sock *sk, int dir, + const struct flowi *fl, u16 family) { struct xfrm_policy *pol; + struct net *net = sock_net(sk); - read_lock_bh(&xfrm_policy_lock); - if ((pol = sk->sk_policy[dir]) != NULL) { - bool match = xfrm_selector_match(&pol->selector, fl, - sk->sk_family); + rcu_read_lock(); + read_lock_bh(&net->xfrm.xfrm_policy_lock); + pol = rcu_dereference(sk->sk_policy[dir]); + if (pol != NULL) { + bool match; int err = 0; + if (pol->family != family) { + pol = NULL; + goto out; + } + + match = xfrm_selector_match(&pol->selector, fl, family); if (match) { if ((sk->sk_mark & pol->mark.m) != pol->mark.v) { pol = NULL; @@ -1121,24 +1252,18 @@ pol = NULL; } out: - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); + rcu_read_unlock(); return pol; } static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { struct net *net = xp_net(pol); - struct hlist_head *chain = policy_hash_bysel(net, &pol->selector, - pol->family, dir); list_add(&pol->walk.all, &net->xfrm.policy_all); - hlist_add_head(&pol->bydst, chain); - hlist_add_head(&pol->byidx, net->xfrm.policy_byidx+idx_hash(net, pol->index)); net->xfrm.policy_count[dir]++; xfrm_pol_hold(pol); - - if (xfrm_bydst_should_resize(net, dir, NULL)) - schedule_work(&net->xfrm.policy_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, @@ -1146,22 +1271,38 @@ { struct net *net = xp_net(pol); - if (hlist_unhashed(&pol->bydst)) + if (list_empty(&pol->walk.all)) return NULL; - hlist_del(&pol->bydst); - hlist_del(&pol->byidx); - list_del(&pol->walk.all); + /* Socket policies are not hashed. */ + if (!hlist_unhashed(&pol->bydst)) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + } + + list_del_init(&pol->walk.all); net->xfrm.policy_count[dir]--; return pol; } +static void xfrm_sk_policy_link(struct xfrm_policy *pol, int dir) +{ + __xfrm_policy_link(pol, XFRM_POLICY_MAX + dir); +} + +static void xfrm_sk_policy_unlink(struct xfrm_policy *pol, int dir) +{ + __xfrm_policy_unlink(pol, XFRM_POLICY_MAX + dir); +} + int xfrm_policy_delete(struct xfrm_policy *pol, int dir) { - write_lock_bh(&xfrm_policy_lock); + struct net *net = xp_net(pol); + + write_lock_bh(&net->xfrm.xfrm_policy_lock); pol = __xfrm_policy_unlink(pol, dir); - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (pol) { xfrm_policy_kill(pol); return 0; @@ -1180,14 +1321,15 @@ return -EINVAL; #endif - write_lock_bh(&xfrm_policy_lock); - old_pol = sk->sk_policy[dir]; - sk->sk_policy[dir] = pol; + write_lock_bh(&net->xfrm.xfrm_policy_lock); + old_pol = rcu_dereference_protected(sk->sk_policy[dir], + lockdep_is_held(&net->xfrm.xfrm_policy_lock)); if (pol) { pol->curlft.add_time = get_seconds(); - pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir); - __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(net, XFRM_POLICY_MAX+dir, 0); + xfrm_sk_policy_link(pol, dir); } + rcu_assign_pointer(sk->sk_policy[dir], pol); if (old_pol) { if (pol) xfrm_policy_requeue(old_pol, pol); @@ -1195,9 +1337,9 @@ /* Unlinking succeeds always. This is the only function * allowed to delete or replace socket policy. */ - __xfrm_policy_unlink(old_pol, XFRM_POLICY_MAX+dir); + xfrm_sk_policy_unlink(old_pol, dir); } - write_unlock_bh(&xfrm_policy_lock); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); if (old_pol) { xfrm_policy_kill(old_pol); @@ -1208,6 +1350,7 @@ static struct xfrm_policy *clone_policy(const struct xfrm_policy *old, int dir) { struct xfrm_policy *newp = xfrm_policy_alloc(xp_net(old), GFP_ATOMIC); + struct net *net = xp_net(old); if (newp) { newp->selector = old->selector; @@ -1226,37 +1369,46 @@ newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); - write_lock_bh(&xfrm_policy_lock); - __xfrm_policy_link(newp, XFRM_POLICY_MAX+dir); - write_unlock_bh(&xfrm_policy_lock); + write_lock_bh(&net->xfrm.xfrm_policy_lock); + xfrm_sk_policy_link(newp, dir); + write_unlock_bh(&net->xfrm.xfrm_policy_lock); xfrm_pol_put(newp); } return newp; } -int __xfrm_sk_clone_policy(struct sock *sk) +int __xfrm_sk_clone_policy(struct sock *sk, const struct sock *osk) { - struct xfrm_policy *p0 = sk->sk_policy[0], - *p1 = sk->sk_policy[1]; + const struct xfrm_policy *p; + struct xfrm_policy *np; + int i, ret = 0; - sk->sk_policy[0] = sk->sk_policy[1] = NULL; - if (p0 && (sk->sk_policy[0] = clone_policy(p0, 0)) == NULL) - return -ENOMEM; - if (p1 && (sk->sk_policy[1] = clone_policy(p1, 1)) == NULL) - return -ENOMEM; - return 0; + rcu_read_lock(); + for (i = 0; i < 2; i++) { + p = rcu_dereference(osk->sk_policy[i]); + if (p) { + np = clone_policy(p, i); + if (unlikely(!np)) { + ret = -ENOMEM; + break; + } + rcu_assign_pointer(sk->sk_policy[i], np); + } + } + rcu_read_unlock(); + return ret; } static int -xfrm_get_saddr(struct net *net, xfrm_address_t *local, xfrm_address_t *remote, - unsigned short family) +xfrm_get_saddr(struct net *net, int oif, xfrm_address_t *local, + xfrm_address_t *remote, unsigned short family) { int err; struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); if (unlikely(afinfo == NULL)) return -EINVAL; - err = afinfo->get_saddr(net, local, remote); + err = afinfo->get_saddr(net, oif, local, remote); xfrm_policy_put_afinfo(afinfo); return err; } @@ -1274,7 +1426,7 @@ xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); xfrm_address_t tmp; - for (nx=0, i = 0; i < policy->xfrm_nr; i++) { + for (nx = 0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; xfrm_address_t *remote = daddr; xfrm_address_t *local = saddr; @@ -1285,7 +1437,9 @@ remote = &tmpl->id.daddr; local = &tmpl->saddr; if (xfrm_addr_any(local, tmpl->encap_family)) { - error = xfrm_get_saddr(net, &tmp, remote, tmpl->encap_family); + error = xfrm_get_saddr(net, fl->flowi_oif, + &tmp, remote, + tmpl->encap_family); if (error) goto fail; local = &tmp; @@ -1304,9 +1458,9 @@ error = (x->km.state == XFRM_STATE_ERROR ? -EINVAL : -EAGAIN); xfrm_state_put(x); - } - else if (error == -ESRCH) + } else if (error == -ESRCH) { error = -EAGAIN; + } if (!tmpl->optional) goto fail; @@ -1314,7 +1468,7 @@ return nx; fail: - for (nx--; nx>=0; nx--) + for (nx--; nx >= 0; nx--) xfrm_state_put(xfrm[nx]); return error; } @@ -1351,7 +1505,7 @@ return cnx; fail: - for (cnx--; cnx>=0; cnx--) + for (cnx--; cnx >= 0; cnx--) xfrm_state_put(tpp[cnx]); return error; @@ -1454,8 +1608,6 @@ memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; - if (afinfo->init_dst) - afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); @@ -1565,8 +1717,8 @@ if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { family = xfrm[i]->props.family; - dst = xfrm_dst_lookup(xfrm[i], tos, &saddr, &daddr, - family); + dst = xfrm_dst_lookup(xfrm[i], tos, fl->flowi_oif, + &saddr, &daddr, family); err = PTR_ERR(dst); if (IS_ERR(dst)) goto put_states; @@ -1629,20 +1781,22 @@ goto out; } -static int inline -xfrm_dst_alloc_copy(void **target, const void *src, int size) +#ifdef CONFIG_XFRM_SUB_POLICY +static int xfrm_dst_alloc_copy(void **target, const void *src, int size) { if (!*target) { *target = kmalloc(size, GFP_ATOMIC); if (!*target) return -ENOMEM; } + memcpy(*target, src, size); return 0; } +#endif -static int inline -xfrm_dst_update_parent(struct dst_entry *dst, const struct xfrm_selector *sel) +static int xfrm_dst_update_parent(struct dst_entry *dst, + const struct xfrm_selector *sel) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1653,8 +1807,8 @@ #endif } -static int inline -xfrm_dst_update_origin(struct dst_entry *dst, const struct flowi *fl) +static int xfrm_dst_update_origin(struct dst_entry *dst, + const struct flowi *fl) { #ifdef CONFIG_XFRM_SUB_POLICY struct xfrm_dst *xdst = (struct xfrm_dst *)dst; @@ -1692,7 +1846,7 @@ xfrm_pols_put(pols, *num_pols); return PTR_ERR(pols[1]); } - (*num_pols) ++; + (*num_pols)++; (*num_xfrms) += pols[1]->xfrm_nr; } } @@ -1746,7 +1900,7 @@ } xdst->num_pols = num_pols; - memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); xdst->policy_genid = atomic_read(&pols[0]->genid); return xdst; @@ -1754,26 +1908,28 @@ static void xfrm_policy_queue_process(unsigned long arg) { - int err = 0; struct sk_buff *skb; struct sock *sk; struct dst_entry *dst; - struct net_device *dev; struct xfrm_policy *pol = (struct xfrm_policy *)arg; + struct net *net = xp_net(pol); struct xfrm_policy_queue *pq = &pol->polq; struct flowi fl; struct sk_buff_head list; spin_lock(&pq->hold_queue.lock); skb = skb_peek(&pq->hold_queue); + if (!skb) { + spin_unlock(&pq->hold_queue.lock); + goto out; + } dst = skb_dst(skb); sk = skb->sk; xfrm_decode_session(skb, &fl, dst->ops->family); spin_unlock(&pq->hold_queue.lock); dst_hold(dst->path); - dst = xfrm_lookup(xp_net(pol), dst->path, &fl, - sk, 0); + dst = xfrm_lookup(net, dst->path, &fl, sk, 0); if (IS_ERR(dst)) goto purge_queue; @@ -1784,8 +1940,9 @@ goto purge_queue; pq->timeout = pq->timeout << 1; - mod_timer(&pq->hold_timer, jiffies + pq->timeout); - return; + if (!mod_timer(&pq->hold_timer, jiffies + pq->timeout)) + xfrm_pol_hold(pol); + goto out; } dst_release(dst); @@ -1802,36 +1959,41 @@ xfrm_decode_session(skb, &fl, skb_dst(skb)->ops->family); dst_hold(skb_dst(skb)->path); - dst = xfrm_lookup(xp_net(pol), skb_dst(skb)->path, - &fl, skb->sk, 0); + dst = xfrm_lookup(net, skb_dst(skb)->path, &fl, skb->sk, 0); if (IS_ERR(dst)) { - dev_put(skb->dev); kfree_skb(skb); continue; } - nf_reset(skb); + nf_reset_no_generic_ct(skb); skb_dst_drop(skb); skb_dst_set(skb, dst); - dev = skb->dev; - err = dst_output(skb); - dev_put(dev); + dst_output(net, skb->sk, skb); } +out: + xfrm_pol_put(pol); return; purge_queue: pq->timeout = 0; - xfrm_queue_purge(&pq->hold_queue); + skb_queue_purge(&pq->hold_queue); + xfrm_pol_put(pol); } -static int xdst_queue_output(struct sk_buff *skb) +static int xdst_queue_output(struct net *net, struct sock *sk, struct sk_buff *skb) { unsigned long sched_next; struct dst_entry *dst = skb_dst(skb); struct xfrm_dst *xdst = (struct xfrm_dst *) dst; - struct xfrm_policy_queue *pq = &xdst->pols[0]->polq; + struct xfrm_policy *pol = xdst->pols[0]; + struct xfrm_policy_queue *pq = &pol->polq; + + if (unlikely(skb_fclone_busy(sk, skb))) { + kfree_skb(skb); + return 0; + } if (pq->hold_queue.qlen > XFRM_MAX_QUEUE_LEN) { kfree_skb(skb); @@ -1839,7 +2001,6 @@ } skb_dst_force(skb); - dev_hold(skb->dev); spin_lock_bh(&pq->hold_queue.lock); @@ -1851,10 +2012,12 @@ if (del_timer(&pq->hold_timer)) { if (time_before(pq->hold_timer.expires, sched_next)) sched_next = pq->hold_timer.expires; + xfrm_pol_put(pol); } __skb_queue_tail(&pq->hold_queue, skb); - mod_timer(&pq->hold_timer, sched_next); + if (!mod_timer(&pq->hold_timer, sched_next)) + xfrm_pol_hold(pol); spin_unlock_bh(&pq->hold_queue.lock); @@ -1862,13 +2025,14 @@ } static struct xfrm_dst *xfrm_create_dummy_bundle(struct net *net, - struct dst_entry *dst, + struct xfrm_flo *xflo, const struct flowi *fl, int num_xfrms, u16 family) { int err; struct net_device *dev; + struct dst_entry *dst; struct dst_entry *dst1; struct xfrm_dst *xdst; @@ -1876,10 +2040,12 @@ if (IS_ERR(xdst)) return xdst; - if (net->xfrm.sysctl_larval_drop || num_xfrms <= 0 || - (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP)) + if (!(xflo->flags & XFRM_LOOKUP_QUEUE) || + net->xfrm.sysctl_larval_drop || + num_xfrms <= 0) return xdst; + dst = xflo->dst_orig; dst1 = &xdst->u.dst; dst_hold(dst); xdst->route = dst; @@ -1921,7 +2087,7 @@ xfrm_bundle_lookup(struct net *net, const struct flowi *fl, u16 family, u8 dir, struct flow_cache_object *oldflo, void *ctx) { - struct dst_entry *dst_orig = (struct dst_entry *)ctx; + struct xfrm_flo *xflo = (struct xfrm_flo *)ctx; struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct xfrm_dst *xdst, *new_xdst; int num_pols = 0, num_xfrms = 0, i, err, pol_dead; @@ -1962,7 +2128,8 @@ goto make_dummy_bundle; } - new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, dst_orig); + new_xdst = xfrm_resolve_and_create_bundle(pols, num_pols, fl, family, + xflo->dst_orig); if (IS_ERR(new_xdst)) { err = PTR_ERR(new_xdst); if (err != -EAGAIN) @@ -1996,14 +2163,14 @@ /* We found policies, but there's no bundles to instantiate: * either because the policy blocks, has no transformations or * we could not build template (no xfrm_states).*/ - xdst = xfrm_create_dummy_bundle(net, dst_orig, fl, num_xfrms, family); + xdst = xfrm_create_dummy_bundle(net, xflo, fl, num_xfrms, family); if (IS_ERR(xdst)) { xfrm_pols_put(pols, num_pols); return ERR_CAST(xdst); } xdst->num_pols = num_pols; xdst->num_xfrms = num_xfrms; - memcpy(xdst->pols, pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(xdst->pols, pols, sizeof(struct xfrm_policy *) * num_pols); dst_hold(&xdst->u.dst); return &xdst->flo; @@ -2042,7 +2209,7 @@ */ struct dst_entry *xfrm_lookup(struct net *net, struct dst_entry *dst_orig, const struct flowi *fl, - struct sock *sk, int flags) + const struct sock *sk, int flags) { struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; struct flow_cache_object *flo; @@ -2052,14 +2219,14 @@ u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); int i, err, num_pols, num_xfrms = 0, drop_pols = 0; -restart: dst = NULL; xdst = NULL; route = NULL; + sk = sk_const_to_full_sk(sk); if (sk && sk->sk_policy[XFRM_POLICY_OUT]) { num_pols = 1; - pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); + pols[0] = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, family); err = xfrm_expand_policies(fl, family, pols, &num_pols, &num_xfrms); if (err < 0) @@ -2085,24 +2252,24 @@ } dst_hold(&xdst->u.dst); - - spin_lock_bh(&xfrm_policy_sk_bundle_lock); - xdst->u.dst.next = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = &xdst->u.dst; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); - + xdst->u.dst.flags |= DST_NOCACHE; route = xdst->route; } } if (xdst == NULL) { + struct xfrm_flo xflo; + + xflo.dst_orig = dst_orig; + xflo.flags = flags; + /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !net->xfrm.policy_count[XFRM_POLICY_OUT]) goto nopol; flo = flow_cache_lookup(net, fl, family, dir, - xfrm_bundle_lookup, dst_orig); + xfrm_bundle_lookup, &xflo); if (flo == NULL) goto nopol; if (IS_ERR(flo)) { @@ -2113,7 +2280,7 @@ num_pols = xdst->num_pols; num_xfrms = xdst->num_xfrms; - memcpy(pols, xdst->pols, sizeof(struct xfrm_policy*) * num_pols); + memcpy(pols, xdst->pols, sizeof(struct xfrm_policy *) * num_pols); route = xdst->route; } @@ -2126,31 +2293,12 @@ * have the xfrm_state's. We need to wait for KM to * negotiate new SA's or bail out with error.*/ if (net->xfrm.sysctl_larval_drop) { - /* EREMOTE tells the caller to generate - * a one-shot blackhole route. */ - dst_release(dst); - xfrm_pols_put(pols, drop_pols); XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); - - return make_blackhole(net, family, dst_orig); + err = -EREMOTE; + goto error; } - if (fl->flowi_flags & FLOWI_FLAG_CAN_SLEEP) { - DECLARE_WAITQUEUE(wait, current); - add_wait_queue(&net->xfrm.km_waitq, &wait); - set_current_state(TASK_INTERRUPTIBLE); - schedule(); - set_current_state(TASK_RUNNING); - remove_wait_queue(&net->xfrm.km_waitq, &wait); - - if (!signal_pending(current)) { - dst_release(dst); - goto restart; - } - - err = -ERESTART; - } else - err = -EAGAIN; + err = -EAGAIN; XFRM_INC_STATS(net, LINUX_MIB_XFRMOUTNOSTATES); goto error; @@ -2198,12 +2346,31 @@ error: dst_release(dst); dropdst: - dst_release(dst_orig); + if (!(flags & XFRM_LOOKUP_KEEP_DST_REF)) + dst_release(dst_orig); xfrm_pols_put(pols, drop_pols); return ERR_PTR(err); } EXPORT_SYMBOL(xfrm_lookup); +/* Callers of xfrm_lookup_route() must ensure a call to dst_output(). + * Otherwise we may send out blackholed packets. + */ +struct dst_entry *xfrm_lookup_route(struct net *net, struct dst_entry *dst_orig, + const struct flowi *fl, + const struct sock *sk, int flags) +{ + struct dst_entry *dst = xfrm_lookup(net, dst_orig, fl, sk, + flags | XFRM_LOOKUP_QUEUE | + XFRM_LOOKUP_KEEP_DST_REF); + + if (IS_ERR(dst) && PTR_ERR(dst) == -EREMOTE) + return make_blackhole(net, dst_orig->ops->family, dst_orig); + + return dst; +} +EXPORT_SYMBOL(xfrm_lookup_route); + static inline int xfrm_secpath_reject(int idx, struct sk_buff *skb, const struct flowi *fl) { @@ -2326,7 +2493,7 @@ if (skb->sp) { int i; - for (i=skb->sp->len-1; i>=0; i--) { + for (i = skb->sp->len-1; i >= 0; i--) { struct xfrm_state *x = skb->sp->xvec[i]; if (!xfrm_selector_match(&x->sel, &fl, family)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINSTATEMISMATCH); @@ -2336,8 +2503,9 @@ } pol = NULL; + sk = sk_to_full_sk(sk); if (sk && sk->sk_policy[dir]) { - pol = xfrm_sk_policy_lookup(sk, dir, &fl); + pol = xfrm_sk_policy_lookup(sk, dir, &fl, family); if (IS_ERR(pol)) { XFRM_INC_STATS(net, LINUX_MIB_XFRMINPOLERROR); return 0; @@ -2372,7 +2540,7 @@ pol->curlft.use_time = get_seconds(); pols[0] = pol; - npols ++; + npols++; #ifdef CONFIG_XFRM_SUB_POLICY if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { pols[1] = xfrm_policy_lookup_bytype(net, XFRM_POLICY_TYPE_MAIN, @@ -2384,7 +2552,7 @@ return 0; } pols[1]->curlft.use_time = get_seconds(); - npols ++; + npols++; } } #endif @@ -2416,7 +2584,7 @@ } xfrm_nr = ti; if (npols > 1) { - xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family, net); tpp = stp; } @@ -2469,7 +2637,7 @@ skb_dst_force(skb); - dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, 0); + dst = xfrm_lookup(net, skb_dst(skb), &fl, NULL, XFRM_LOOKUP_QUEUE); if (IS_ERR(dst)) { res = 0; dst = NULL; @@ -2541,33 +2709,15 @@ return dst; } -static void __xfrm_garbage_collect(struct net *net) -{ - struct dst_entry *head, *next; - - spin_lock_bh(&xfrm_policy_sk_bundle_lock); - head = xfrm_policy_sk_bundles; - xfrm_policy_sk_bundles = NULL; - spin_unlock_bh(&xfrm_policy_sk_bundle_lock); - - while (head) { - next = head->next; - dst_free(head); - head = next; - } -} - void xfrm_garbage_collect(struct net *net) { - flow_cache_flush(); - __xfrm_garbage_collect(net); + flow_cache_flush(net); } EXPORT_SYMBOL(xfrm_garbage_collect); static void xfrm_garbage_collect_deferred(struct net *net) { - flow_cache_flush_deferred(); - __xfrm_garbage_collect(net); + flow_cache_flush_deferred(net); } static void xfrm_init_pmtu(struct dst_entry *dst) @@ -2681,7 +2831,6 @@ int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { - struct net *net; int err = 0; if (unlikely(afinfo == NULL)) return -EINVAL; @@ -2689,7 +2838,7 @@ return -EAFNOSUPPORT; spin_lock(&xfrm_policy_afinfo_lock); if (unlikely(xfrm_policy_afinfo[afinfo->family] != NULL)) - err = -ENOBUFS; + err = -EEXIST; else { struct dst_ops *dst_ops = afinfo->dst_ops; if (likely(dst_ops->kmem_cachep == NULL)) @@ -2712,26 +2861,6 @@ } spin_unlock(&xfrm_policy_afinfo_lock); - rtnl_lock(); - for_each_net(net) { - struct dst_ops *xfrm_dst_ops; - - switch (afinfo->family) { - case AF_INET: - xfrm_dst_ops = &net->xfrm.xfrm4_dst_ops; - break; -#if IS_ENABLED(CONFIG_IPV6) - case AF_INET6: - xfrm_dst_ops = &net->xfrm.xfrm6_dst_ops; - break; -#endif - default: - BUG(); - } - *xfrm_dst_ops = *afinfo->dst_ops; - } - rtnl_unlock(); - return err; } EXPORT_SYMBOL(xfrm_policy_register_afinfo); @@ -2767,25 +2896,9 @@ } EXPORT_SYMBOL(xfrm_policy_unregister_afinfo); -static void __net_init xfrm_dst_ops_init(struct net *net) -{ - struct xfrm_policy_afinfo *afinfo; - - rcu_read_lock(); - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET]); - if (afinfo) - net->xfrm.xfrm4_dst_ops = *afinfo->dst_ops; -#if IS_ENABLED(CONFIG_IPV6) - afinfo = rcu_dereference(xfrm_policy_afinfo[AF_INET6]); - if (afinfo) - net->xfrm.xfrm6_dst_ops = *afinfo->dst_ops; -#endif - rcu_read_unlock(); -} - static int xfrm_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { - struct net_device *dev = ptr; + struct net_device *dev = netdev_notifier_info_to_dev(ptr); switch (event) { case NETDEV_DOWN: @@ -2802,21 +2915,19 @@ static int __net_init xfrm_statistics_init(struct net *net) { int rv; - - if (snmp_mib_init((void __percpu **)net->mib.xfrm_statistics, - sizeof(struct linux_xfrm_mib), - __alignof__(struct linux_xfrm_mib)) < 0) + net->mib.xfrm_statistics = alloc_percpu(struct linux_xfrm_mib); + if (!net->mib.xfrm_statistics) return -ENOMEM; rv = xfrm_proc_init(net); if (rv < 0) - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); return rv; } static void xfrm_statistics_fini(struct net *net) { xfrm_proc_fini(net); - snmp_mib_free((void __percpu **)net->mib.xfrm_statistics); + free_percpu(net->mib.xfrm_statistics); } #else static int __net_init xfrm_statistics_init(struct net *net) @@ -2848,10 +2959,11 @@ goto out_byidx; net->xfrm.policy_idx_hmask = hmask; - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; net->xfrm.policy_count[dir] = 0; + net->xfrm.policy_count[XFRM_POLICY_MAX + dir] = 0; INIT_HLIST_HEAD(&net->xfrm.policy_inexact[dir]); htab = &net->xfrm.policy_bydst[dir]; @@ -2859,10 +2971,21 @@ if (!htab->table) goto out_bydst; htab->hmask = hmask; - } + htab->dbits4 = 32; + htab->sbits4 = 32; + htab->dbits6 = 128; + htab->sbits6 = 128; + } + net->xfrm.policy_hthresh.lbits4 = 32; + net->xfrm.policy_hthresh.rbits4 = 32; + net->xfrm.policy_hthresh.lbits6 = 128; + net->xfrm.policy_hthresh.rbits6 = 128; + + seqlock_init(&net->xfrm.policy_hthresh.lock); INIT_LIST_HEAD(&net->xfrm.policy_all); INIT_WORK(&net->xfrm.policy_hash_work, xfrm_hash_resize); + INIT_WORK(&net->xfrm.policy_hthresh.work, xfrm_hash_rebuild); if (net_eq(net, &init_net)) register_netdevice_notifier(&xfrm_dev_notifier); return 0; @@ -2881,25 +3004,18 @@ static void xfrm_policy_fini(struct net *net) { - struct xfrm_audit audit_info; unsigned int sz; int dir; flush_work(&net->xfrm.policy_hash_work); #ifdef CONFIG_XFRM_SUB_POLICY - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = -1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_SUB, false); #endif - audit_info.loginuid = INVALID_UID; - audit_info.sessionid = -1; - audit_info.secid = 0; - xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, &audit_info); + xfrm_policy_flush(net, XFRM_POLICY_TYPE_MAIN, false); WARN_ON(!list_empty(&net->xfrm.policy_all)); - for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { struct xfrm_policy_hash *htab; WARN_ON(!hlist_empty(&net->xfrm.policy_inexact[dir])); @@ -2919,6 +3035,11 @@ { int rv; + /* Initialize the per-net locks here */ + spin_lock_init(&net->xfrm.xfrm_state_lock); + rwlock_init(&net->xfrm.xfrm_policy_lock); + mutex_init(&net->xfrm.xfrm_cfg_mutex); + rv = xfrm_statistics_init(net); if (rv < 0) goto out_statistics; @@ -2928,12 +3049,17 @@ rv = xfrm_policy_init(net); if (rv < 0) goto out_policy; - xfrm_dst_ops_init(net); rv = xfrm_sysctl_init(net); if (rv < 0) goto out_sysctl; + rv = flow_cache_init(net); + if (rv < 0) + goto out; + return 0; +out: + xfrm_sysctl_fini(net); out_sysctl: xfrm_policy_fini(net); out_policy: @@ -2946,6 +3072,7 @@ static void __net_exit xfrm_net_exit(struct net *net) { + flow_cache_fini(net); xfrm_sysctl_fini(net); xfrm_policy_fini(net); xfrm_state_fini(net); @@ -2961,6 +3088,7 @@ { register_pernet_subsys(&xfrm_net_ops); xfrm_input_init(); + xfrm_output_init(); } #ifdef CONFIG_AUDITSYSCALL @@ -2974,7 +3102,7 @@ audit_log_format(audit_buf, " sec_alg=%u sec_doi=%u sec_obj=%s", ctx->ctx_alg, ctx->ctx_doi, ctx->ctx_str); - switch(sel->family) { + switch (sel->family) { case AF_INET: audit_log_format(audit_buf, " src=%pI4", &sel->saddr.a4); if (sel->prefixlen_s != 32) @@ -2998,15 +3126,14 @@ } } -void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, - kuid_t auid, u32 sessionid, u32 secid) +void xfrm_audit_policy_add(struct xfrm_policy *xp, int result, bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-add"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3014,14 +3141,14 @@ EXPORT_SYMBOL_GPL(xfrm_audit_policy_add); void xfrm_audit_policy_delete(struct xfrm_policy *xp, int result, - kuid_t auid, u32 sessionid, u32 secid) + bool task_valid) { struct audit_buffer *audit_buf; audit_buf = xfrm_audit_start("SPD-delete"); if (audit_buf == NULL) return; - xfrm_audit_helper_usrinfo(auid, sessionid, secid, audit_buf); + xfrm_audit_helper_usrinfo(task_valid, audit_buf); audit_log_format(audit_buf, " res=%u", result); xfrm_audit_common_policyinfo(xp, audit_buf); audit_log_end(audit_buf); @@ -3051,15 +3178,15 @@ return false; } -static struct xfrm_policy * xfrm_migrate_policy_find(const struct xfrm_selector *sel, - u8 dir, u8 type) +static struct xfrm_policy *xfrm_migrate_policy_find(const struct xfrm_selector *sel, + u8 dir, u8 type, struct net *net) { struct xfrm_policy *pol, *ret = NULL; struct hlist_head *chain; u32 priority = ~0U; - read_lock_bh(&xfrm_policy_lock); - chain = policy_hash_direct(&init_net, &sel->daddr, &sel->saddr, sel->family, dir); + read_lock_bh(&net->xfrm.xfrm_policy_lock); /*FIXME*/ + chain = policy_hash_direct(net, &sel->daddr, &sel->saddr, sel->family, dir); hlist_for_each_entry(pol, chain, bydst) { if (xfrm_migrate_selector_match(sel, &pol->selector) && pol->type == type) { @@ -3068,20 +3195,21 @@ break; } } - chain = &init_net.xfrm.policy_inexact[dir]; + chain = &net->xfrm.policy_inexact[dir]; hlist_for_each_entry(pol, chain, bydst) { + if ((pol->priority >= priority) && ret) + break; + if (xfrm_migrate_selector_match(sel, &pol->selector) && - pol->type == type && - pol->priority < priority) { + pol->type == type) { ret = pol; break; } } - if (ret) - xfrm_pol_hold(ret); + xfrm_pol_hold(ret); - read_unlock_bh(&xfrm_policy_lock); + read_unlock_bh(&net->xfrm.xfrm_policy_lock); return ret; } @@ -3192,7 +3320,7 @@ int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, struct xfrm_migrate *m, int num_migrate, - struct xfrm_kmaddress *k) + struct xfrm_kmaddress *k, struct net *net) { int i, err, nx_cur = 0, nx_new = 0; struct xfrm_policy *pol = NULL; @@ -3205,14 +3333,14 @@ goto out; /* Stage 1 - find policy */ - if ((pol = xfrm_migrate_policy_find(sel, dir, type)) == NULL) { + if ((pol = xfrm_migrate_policy_find(sel, dir, type, net)) == NULL) { err = -ENOENT; goto out; } /* Stage 2 - find and update state(s) */ for (i = 0, mp = m; i < num_migrate; i++, mp++) { - if ((x = xfrm_migrate_state_find(mp))) { + if ((x = xfrm_migrate_state_find(mp, net))) { x_cur[nx_cur] = x; nx_cur++; if ((xc = xfrm_state_migrate(x, mp))) {