#if defined(CONFIG_BCM_MPTCP) && defined(CONFIG_BCM_KF_MPTCP) /* MPTCP Scheduler module selector. Highly inspired by tcp_cong.c */ #include #include static unsigned char num_segments __read_mostly = 1; module_param(num_segments, byte, 0644); MODULE_PARM_DESC(num_segments, "The number of consecutive segments that are part of a burst"); static bool cwnd_limited __read_mostly = 1; module_param(cwnd_limited, bool, 0644); MODULE_PARM_DESC(cwnd_limited, "if set to 1, the scheduler tries to fill the congestion-window on all subflows"); struct rrsched_priv { unsigned char quota; }; static struct rrsched_priv *rrsched_get_priv(const struct tcp_sock *tp) { return (struct rrsched_priv *)&tp->mptcp->mptcp_sched[0]; } /* If the sub-socket sk available to send the skb? */ static bool mptcp_rr_is_available(const struct sock *sk, const struct sk_buff *skb, bool zero_wnd_test, bool cwnd_test) { const struct tcp_sock *tp = tcp_sk(sk); unsigned int space, in_flight; /* Set of states for which we are allowed to send data */ if (!mptcp_sk_can_send(sk)) return false; /* We do not send data on this subflow unless it is * fully established, i.e. the 4th ack has been received. */ if (tp->mptcp->pre_established) return false; if (tp->pf) return false; if (inet_csk(sk)->icsk_ca_state == TCP_CA_Loss) { /* If SACK is disabled, and we got a loss, TCP does not exit * the loss-state until something above high_seq has been acked. * (see tcp_try_undo_recovery) * * high_seq is the snd_nxt at the moment of the RTO. As soon * as we have an RTO, we won't push data on the subflow. * Thus, snd_una can never go beyond high_seq. */ if (!tcp_is_reno(tp)) return false; else if (tp->snd_una != tp->high_seq) return false; } if (!tp->mptcp->fully_established) { /* Make sure that we send in-order data */ if (skb && tp->mptcp->second_packet && tp->mptcp->last_end_data_seq != TCP_SKB_CB(skb)->seq) return false; } if (!cwnd_test) goto zero_wnd_test; in_flight = tcp_packets_in_flight(tp); /* Not even a single spot in the cwnd */ if (in_flight >= tp->snd_cwnd) return false; /* Now, check if what is queued in the subflow's send-queue * already fills the cwnd. */ space = (tp->snd_cwnd - in_flight) * tp->mss_cache; if (tp->write_seq - tp->snd_nxt > space) return false; zero_wnd_test: if (zero_wnd_test && !before(tp->write_seq, tcp_wnd_end(tp))) return false; return true; } /* Are we not allowed to reinject this skb on tp? */ static int mptcp_rr_dont_reinject_skb(const struct tcp_sock *tp, const struct sk_buff *skb) { /* If the skb has already been enqueued in this sk, try to find * another one. */ return skb && /* Has the skb already been enqueued into this subsocket? */ mptcp_pi_to_flag(tp->mptcp->path_index) & TCP_SKB_CB(skb)->path_mask; } /* We just look for any subflow that is available */ static struct sock *rr_get_available_subflow(struct sock *meta_sk, struct sk_buff *skb, bool zero_wnd_test) { const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct sock *sk, *bestsk = NULL, *backupsk = NULL; /* Answer data_fin on same subflow!!! */ if (meta_sk->sk_shutdown & RCV_SHUTDOWN && skb && mptcp_is_data_fin(skb)) { mptcp_for_each_sk(mpcb, sk) { if (tcp_sk(sk)->mptcp->path_index == mpcb->dfin_path_index && mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) return sk; } } /* First, find the best subflow */ mptcp_for_each_sk(mpcb, sk) { struct tcp_sock *tp = tcp_sk(sk); if (!mptcp_rr_is_available(sk, skb, zero_wnd_test, true)) continue; if (mptcp_rr_dont_reinject_skb(tp, skb)) { backupsk = sk; continue; } bestsk = sk; } if (bestsk) { sk = bestsk; } else if (backupsk) { /* It has been sent on all subflows once - let's give it a * chance again by restarting its pathmask. */ if (skb) TCP_SKB_CB(skb)->path_mask = 0; sk = backupsk; } return sk; } /* Returns the next segment to be sent from the mptcp meta-queue. * (chooses the reinject queue if any segment is waiting in it, otherwise, * chooses the normal write queue). * Sets *@reinject to 1 if the returned segment comes from the * reinject queue. Sets it to 0 if it is the regular send-head of the meta-sk, * and sets it to -1 if it is a meta-level retransmission to optimize the * receive-buffer. */ static struct sk_buff *__mptcp_rr_next_segment(const struct sock *meta_sk, int *reinject) { const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct sk_buff *skb = NULL; *reinject = 0; /* If we are in fallback-mode, just take from the meta-send-queue */ if (mpcb->infinite_mapping_snd || mpcb->send_infinite_mapping) return tcp_send_head(meta_sk); skb = skb_peek(&mpcb->reinject_queue); if (skb) *reinject = 1; else skb = tcp_send_head(meta_sk); return skb; } static struct sk_buff *mptcp_rr_next_segment(struct sock *meta_sk, int *reinject, struct sock **subsk, unsigned int *limit) { const struct mptcp_cb *mpcb = tcp_sk(meta_sk)->mpcb; struct sock *sk_it, *choose_sk = NULL; struct sk_buff *skb = __mptcp_rr_next_segment(meta_sk, reinject); unsigned char split = num_segments; unsigned char iter = 0, full_subs = 0; /* As we set it, we have to reset it as well. */ *limit = 0; if (!skb) return NULL; if (*reinject) { *subsk = rr_get_available_subflow(meta_sk, skb, false); if (!*subsk) return NULL; return skb; } retry: /* First, we look for a subflow who is currently being used */ mptcp_for_each_sk(mpcb, sk_it) { struct tcp_sock *tp_it = tcp_sk(sk_it); struct rrsched_priv *rsp = rrsched_get_priv(tp_it); if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) continue; iter++; /* Is this subflow currently being used? */ if (rsp->quota > 0 && rsp->quota < num_segments) { split = num_segments - rsp->quota; choose_sk = sk_it; goto found; } /* Or, it's totally unused */ if (!rsp->quota) { split = num_segments; choose_sk = sk_it; } /* Or, it must then be fully used */ if (rsp->quota >= num_segments) full_subs++; } /* All considered subflows have a full quota, and we considered at * least one. */ if (iter && iter == full_subs) { /* So, we restart this round by setting quota to 0 and retry * to find a subflow. */ mptcp_for_each_sk(mpcb, sk_it) { struct tcp_sock *tp_it = tcp_sk(sk_it); struct rrsched_priv *rsp = rrsched_get_priv(tp_it); if (!mptcp_rr_is_available(sk_it, skb, false, cwnd_limited)) continue; rsp->quota = 0; } goto retry; } found: if (choose_sk) { unsigned int mss_now; struct tcp_sock *choose_tp = tcp_sk(choose_sk); struct rrsched_priv *rsp = rrsched_get_priv(choose_tp); if (!mptcp_rr_is_available(choose_sk, skb, false, true)) return NULL; *subsk = choose_sk; mss_now = tcp_current_mss(*subsk); *limit = split * mss_now; if (skb->len > mss_now) rsp->quota += DIV_ROUND_UP(skb->len, mss_now); else rsp->quota++; return skb; } return NULL; } static struct mptcp_sched_ops mptcp_sched_rr = { .get_subflow = rr_get_available_subflow, .next_segment = mptcp_rr_next_segment, .name = "roundrobin", .owner = THIS_MODULE, }; static int __init rr_register(void) { BUILD_BUG_ON(sizeof(struct rrsched_priv) > MPTCP_SCHED_SIZE); if (mptcp_register_scheduler(&mptcp_sched_rr)) return -1; return 0; } static void rr_unregister(void) { mptcp_unregister_scheduler(&mptcp_sched_rr); } module_init(rr_register); module_exit(rr_unregister); MODULE_AUTHOR("Christoph Paasch"); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("ROUNDROBIN MPTCP"); MODULE_VERSION("0.89"); #endif