From 4e6cbfd09c66893e5134c9896e9af353c2322b66 Mon Sep 17 00:00:00 2001 From: "John W. Linville" Date: Thu, 29 Jul 2010 16:14:13 -0400 Subject: mac80211: support use of NAPI for bottom-half processing This patch implement basic infrastructure to support use of NAPI by mac80211-based hardware drivers. Because mac80211 devices can support multiple netdevs, a dummy netdev is used for interfacing with the NAPI code in the core of the network stack. That structure is hidden from the hardware drivers, but the actual napi_struct is exposed in the ieee80211_hw structure so that the poll routines in drivers can retrieve that structure. Hardware drivers can also specify their own weight value for NAPI polling. Signed-off-by: John W. Linville --- include/net/mac80211.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b0787a1dea9..3f1e03b521e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1102,6 +1102,10 @@ enum ieee80211_hw_flags { * * @max_rates: maximum number of alternate rate retry stages * @max_rate_tries: maximum number of tries for each stage + * + * @napi_weight: weight used for NAPI polling. You must specify an + * appropriate value here if a napi_poll operation is provided + * by your driver. */ struct ieee80211_hw { struct ieee80211_conf conf; @@ -1113,6 +1117,7 @@ struct ieee80211_hw { int channel_change_time; int vif_data_size; int sta_data_size; + int napi_weight; u16 queues; u16 max_listen_interval; s8 max_signal; @@ -1687,6 +1692,8 @@ enum ieee80211_ampdu_mlme_action { * switch operation for CSAs received from the AP may implement this * callback. They must then call ieee80211_chswitch_done() to indicate * completion of the channel switch. + * + * @napi_poll: Poll Rx queue for incoming data frames. */ struct ieee80211_ops { int (*tx)(struct ieee80211_hw *hw, struct sk_buff *skb); @@ -1752,6 +1759,7 @@ struct ieee80211_ops { void (*flush)(struct ieee80211_hw *hw, bool drop); void (*channel_switch)(struct ieee80211_hw *hw, struct ieee80211_channel_switch *ch_switch); + int (*napi_poll)(struct ieee80211_hw *hw, int budget); }; /** @@ -1897,6 +1905,22 @@ void ieee80211_free_hw(struct ieee80211_hw *hw); */ void ieee80211_restart_hw(struct ieee80211_hw *hw); +/** ieee80211_napi_schedule - schedule NAPI poll + * + * Use this function to schedule NAPI polling on a device. + * + * @hw: the hardware to start polling + */ +void ieee80211_napi_schedule(struct ieee80211_hw *hw); + +/** ieee80211_napi_complete - complete NAPI polling + * + * Use this function to finish NAPI polling on a device. + * + * @hw: the hardware to stop polling + */ +void ieee80211_napi_complete(struct ieee80211_hw *hw); + /** * ieee80211_rx - receive frame * -- cgit v1.2.3 From 7da7cc1d42d8ce02cca16df8c021e6d657f1f8fd Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:02:38 +0200 Subject: mac80211: per interface idle notification Sometimes we don't just need to know whether or not the device is idle, but also per interface. This adds that reporting capability to mac80211. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 ++++++ net/mac80211/ibss.c | 8 +++++-- net/mac80211/ieee80211_i.h | 3 +++ net/mac80211/iface.c | 53 +++++++++++++++++++++++++++++++++++++++------- net/mac80211/mlme.c | 17 +++++++++++++-- net/mac80211/scan.c | 2 ++ net/mac80211/work.c | 8 +++---- 7 files changed, 81 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3f1e03b521e..3a3c26f647b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -149,6 +149,7 @@ struct ieee80211_low_level_stats { * @BSS_CHANGED_ARP_FILTER: Hardware ARP filter address list or state changed. * @BSS_CHANGED_QOS: QoS for this association was enabled/disabled. Note * that it is only ever disabled for station mode. + * @BSS_CHANGED_IDLE: Idle changed for this BSS/interface. */ enum ieee80211_bss_change { BSS_CHANGED_ASSOC = 1<<0, @@ -165,6 +166,7 @@ enum ieee80211_bss_change { BSS_CHANGED_IBSS = 1<<11, BSS_CHANGED_ARP_FILTER = 1<<12, BSS_CHANGED_QOS = 1<<13, + BSS_CHANGED_IDLE = 1<<14, /* when adding here, make sure to change ieee80211_reconfig */ }; @@ -223,6 +225,9 @@ enum ieee80211_bss_change { * hardware must not perform any ARP filtering. Note, that the filter will * be enabled also in promiscuous mode. * @qos: This is a QoS-enabled BSS. + * @idle: This interface is idle. There's also a global idle flag in the + * hardware config which may be more appropriate depending on what + * your driver/device needs to do. */ struct ieee80211_bss_conf { const u8 *bssid; @@ -247,6 +252,7 @@ struct ieee80211_bss_conf { u8 arp_addr_cnt; bool arp_filter_enabled; bool qos; + bool idle; }; /** diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c index c691780725a..32af9710842 100644 --- a/net/mac80211/ibss.c +++ b/net/mac80211/ibss.c @@ -920,12 +920,14 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata, memcpy(sdata->u.ibss.ssid, params->ssid, IEEE80211_MAX_SSID_LEN); sdata->u.ibss.ssid_len = params->ssid_len; + mutex_unlock(&sdata->u.ibss.mtx); + + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); ieee80211_queue_work(&sdata->local->hw, &sdata->work); - mutex_unlock(&sdata->u.ibss.mtx); - return 0; } @@ -980,7 +982,9 @@ int ieee80211_ibss_leave(struct ieee80211_sub_if_data *sdata) mutex_unlock(&sdata->u.ibss.mtx); + mutex_lock(&local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&local->mtx); return 0; } diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index b44e03a02da..98e783c6a36 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -497,6 +497,9 @@ struct ieee80211_sub_if_data { */ bool ht_opmode_valid; + /* to detect idle changes */ + bool old_idle; + /* Fragment table for host-based reassembly */ struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index c1008a9d7bf..9459aeee0dd 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -309,7 +309,9 @@ static int ieee80211_open(struct net_device *dev) if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_inc(&local->iff_promiscs); + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); local->open_count++; if (hw_reconf_flags) { @@ -516,7 +518,9 @@ static int ieee80211_stop(struct net_device *dev) sdata->bss = NULL; + mutex_lock(&local->mtx); hw_reconf_flags |= __ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_recalc_ps(local, -1); @@ -1199,28 +1203,61 @@ u32 __ieee80211_recalc_idle(struct ieee80211_local *local) { struct ieee80211_sub_if_data *sdata; int count = 0; + bool working = false, scanning = false; + struct ieee80211_work *wk; - if (!list_empty(&local->work_list)) - return ieee80211_idle_off(local, "working"); - - if (local->scanning) - return ieee80211_idle_off(local, "scanning"); +#ifdef CONFIG_PROVE_LOCKING + WARN_ON(debug_locks && !lockdep_rtnl_is_held() && + !lockdep_is_held(&local->iflist_mtx)); +#endif + lockdep_assert_held(&local->mtx); list_for_each_entry(sdata, &local->interfaces, list) { - if (!ieee80211_sdata_running(sdata)) + if (!ieee80211_sdata_running(sdata)) { + sdata->vif.bss_conf.idle = true; continue; + } + + sdata->old_idle = sdata->vif.bss_conf.idle; + /* do not count disabled managed interfaces */ if (sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated) + !sdata->u.mgd.associated) { + sdata->vif.bss_conf.idle = true; continue; + } /* do not count unused IBSS interfaces */ if (sdata->vif.type == NL80211_IFTYPE_ADHOC && - !sdata->u.ibss.ssid_len) + !sdata->u.ibss.ssid_len) { + sdata->vif.bss_conf.idle = true; continue; + } /* count everything else */ count++; } + list_for_each_entry(wk, &local->work_list, list) { + working = true; + wk->sdata->vif.bss_conf.idle = false; + } + + if (local->scan_sdata) { + scanning = true; + local->scan_sdata->vif.bss_conf.idle = false; + } + + list_for_each_entry(sdata, &local->interfaces, list) { + if (sdata->old_idle == sdata->vif.bss_conf.idle) + continue; + if (!ieee80211_sdata_running(sdata)) + continue; + ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_IDLE); + } + + if (working) + return ieee80211_idle_off(local, "working"); + if (scanning) + return ieee80211_idle_off(local, "scanning"); if (!count) return ieee80211_idle_on(local); else diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 17e9257a61d..82e7cec5179 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1103,8 +1103,11 @@ static void __ieee80211_connection_loss(struct ieee80211_sub_if_data *sdata) printk(KERN_DEBUG "Connection to AP %pM lost.\n", bssid); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -1173,7 +1176,9 @@ ieee80211_rx_mgmt_deauth(struct ieee80211_sub_if_data *sdata, sdata->name, bssid, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DEAUTH; } @@ -1203,7 +1208,9 @@ ieee80211_rx_mgmt_disassoc(struct ieee80211_sub_if_data *sdata, sdata->name, mgmt->sa, reason_code); ieee80211_set_disassoc(sdata, true); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return RX_MGMT_CFG80211_DISASSOC; } @@ -1840,8 +1847,10 @@ void ieee80211_sta_work(struct ieee80211_sub_if_data *sdata) " after %dms, disconnecting.\n", bssid, (1000 * IEEE80211_PROBE_WAIT)/HZ); ieee80211_set_disassoc(sdata, true); - ieee80211_recalc_idle(local); mutex_unlock(&ifmgd->mtx); + mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); /* * must be outside lock due to cfg80211, * but that's not a problem. @@ -2319,7 +2328,9 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata, if (assoc_bss) sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } @@ -2357,7 +2368,9 @@ int ieee80211_mgd_disassoc(struct ieee80211_sub_if_data *sdata, cookie, !req->local_state_change); sta_info_destroy_addr(sdata, bssid); + mutex_lock(&sdata->local->mtx); ieee80211_recalc_idle(sdata->local); + mutex_unlock(&sdata->local->mtx); return 0; } diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index f31f549733b..31f233f7f51 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -304,7 +304,9 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_offchannel_return(local, true); done: + mutex_lock(&local->mtx); ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); ieee80211_mlme_notify_scan_completed(local); ieee80211_ibss_notify_scan_completed(local); ieee80211_mesh_notify_scan_completed(local); diff --git a/net/mac80211/work.c b/net/mac80211/work.c index b98af64f586..ae344d1ba05 100644 --- a/net/mac80211/work.c +++ b/net/mac80211/work.c @@ -888,10 +888,10 @@ static void ieee80211_work_work(struct work_struct *work) while ((skb = skb_dequeue(&local->work_skb_queue))) ieee80211_work_rx_queued_mgmt(local, skb); - ieee80211_recalc_idle(local); - mutex_lock(&local->mtx); + ieee80211_recalc_idle(local); + list_for_each_entry_safe(wk, tmp, &local->work_list, list) { bool started = wk->started; @@ -1001,10 +1001,10 @@ static void ieee80211_work_work(struct work_struct *work) &local->scan_work, round_jiffies_relative(0)); - mutex_unlock(&local->mtx); - ieee80211_recalc_idle(local); + mutex_unlock(&local->mtx); + list_for_each_entry_safe(wk, tmp, &free_work, list) { wk->done(wk, NULL); list_del(&wk->list); -- cgit v1.2.3 From d1f5b7a34aa5ff703c4966ea2652d4212ac75940 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 5 Aug 2010 17:05:55 +0200 Subject: mac80211: allow drivers to request SM PS mode change Sometimes drivers have more information than the stack about how their antennas/chains are used, and may require that the SM PS mode be changed. This could happen, for example, when detecting that the user disconnected an antenna. Thus this patch introduces API to allow drivers to request SM PS mode changes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 12 ++++++++++++ net/mac80211/ht.c | 28 ++++++++++++++++++++++++++++ net/mac80211/ieee80211_i.h | 6 +++++- net/mac80211/mlme.c | 3 +++ 4 files changed, 48 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 3a3c26f647b..871ed1de736 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2548,6 +2548,18 @@ void ieee80211_cqm_rssi_notify(struct ieee80211_vif *vif, */ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); +/** + * ieee80211_request_smps - request SM PS transition + * @vif: &struct ieee80211_vif pointer from the add_interface callback. + * @mode: new SM PS mode + * + * This allows the driver to request an SM PS transition in managed + * mode. This is useful when the driver has more information than + * the stack about possible interference, for example by bluetooth. + */ +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode); + /* Rate control API */ /** diff --git a/net/mac80211/ht.c b/net/mac80211/ht.c index 9d101fb3386..11f74f5f7b2 100644 --- a/net/mac80211/ht.c +++ b/net/mac80211/ht.c @@ -265,3 +265,31 @@ int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, return 0; } + +void ieee80211_request_smps_work(struct work_struct *work) +{ + struct ieee80211_sub_if_data *sdata = + container_of(work, struct ieee80211_sub_if_data, + u.mgd.request_smps_work); + + mutex_lock(&sdata->u.mgd.mtx); + __ieee80211_request_smps(sdata, sdata->u.mgd.driver_smps_mode); + mutex_unlock(&sdata->u.mgd.mtx); +} + +void ieee80211_request_smps(struct ieee80211_vif *vif, + enum ieee80211_smps_mode smps_mode) +{ + struct ieee80211_sub_if_data *sdata = vif_to_sdata(vif); + + if (WARN_ON(vif->type != NL80211_IFTYPE_STATION)) + return; + + if (WARN_ON(smps_mode == IEEE80211_SMPS_OFF)) + smps_mode = IEEE80211_SMPS_AUTOMATIC; + + ieee80211_queue_work(&sdata->local->hw, + &sdata->u.mgd.request_smps_work); +} +/* this might change ... don't want non-open drivers using it */ +EXPORT_SYMBOL_GPL(ieee80211_request_smps); diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 98e783c6a36..1bf05bfd149 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -343,7 +343,10 @@ struct ieee80211_if_managed { unsigned long timers_running; /* used for quiesce/restart */ bool powersave; /* powersave requested for this iface */ enum ieee80211_smps_mode req_smps, /* requested smps mode */ - ap_smps; /* smps mode AP thinks we're in */ + ap_smps, /* smps mode AP thinks we're in */ + driver_smps_mode; /* smps mode request */ + + struct work_struct request_smps_work; unsigned int flags; @@ -1113,6 +1116,7 @@ void ieee80211_send_delba(struct ieee80211_sub_if_data *sdata, int ieee80211_send_smps_action(struct ieee80211_sub_if_data *sdata, enum ieee80211_smps_mode smps, const u8 *da, const u8 *bssid); +void ieee80211_request_smps_work(struct work_struct *work); void ___ieee80211_stop_rx_ba_session(struct sta_info *sta, u16 tid, u16 initiator, u16 reason); diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c index 82e7cec5179..38996a44aa8 100644 --- a/net/mac80211/mlme.c +++ b/net/mac80211/mlme.c @@ -1926,6 +1926,8 @@ void ieee80211_sta_quiesce(struct ieee80211_sub_if_data *sdata) * time -- the code here is properly synchronised. */ + cancel_work_sync(&ifmgd->request_smps_work); + cancel_work_sync(&ifmgd->beacon_connection_loss_work); if (del_timer_sync(&ifmgd->timer)) set_bit(TMR_RUNNING_TIMER, &ifmgd->timers_running); @@ -1961,6 +1963,7 @@ void ieee80211_sta_setup_sdata(struct ieee80211_sub_if_data *sdata) INIT_WORK(&ifmgd->chswitch_work, ieee80211_chswitch_work); INIT_WORK(&ifmgd->beacon_connection_loss_work, ieee80211_beacon_connection_loss_work); + INIT_WORK(&ifmgd->request_smps_work, ieee80211_request_smps_work); setup_timer(&ifmgd->timer, ieee80211_sta_timer, (unsigned long) sdata); setup_timer(&ifmgd->bcn_mon_timer, ieee80211_sta_bcn_mon_timer, -- cgit v1.2.3 From 97359d1235eaf634fe706c9faa6e40181cc95fb8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 10 Aug 2010 09:46:38 +0200 Subject: mac80211: use cipher suite selectors Currently, mac80211 translates the cfg80211 cipher suite selectors into ALG_* values. That isn't all too useful, and some drivers benefit from the distinction between WEP40 and WEP104 as well. Therefore, convert it all to use the cipher suite selectors. Signed-off-by: Johannes Berg Acked-by: Gertjan van Wingerde Signed-off-by: John W. Linville --- drivers/net/wireless/at76c50x-usb.c | 7 ++-- drivers/net/wireless/ath/ar9170/main.c | 31 ++++++++------- drivers/net/wireless/ath/ath5k/base.c | 9 +++-- drivers/net/wireless/ath/ath5k/pcu.c | 19 ++++----- drivers/net/wireless/ath/ath9k/common.c | 36 ++++++++++-------- drivers/net/wireless/ath/ath9k/htc_drv_main.c | 5 ++- drivers/net/wireless/ath/ath9k/main.c | 5 ++- drivers/net/wireless/b43/main.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn-tx.c | 16 ++++---- drivers/net/wireless/iwlwifi/iwl-agn.c | 4 +- drivers/net/wireless/iwlwifi/iwl-dev.h | 2 +- drivers/net/wireless/iwlwifi/iwl-sta.c | 24 ++++++------ drivers/net/wireless/iwlwifi/iwl3945-base.c | 43 ++++++++++++--------- drivers/net/wireless/p54/main.c | 9 +++-- drivers/net/wireless/p54/txrx.c | 17 +++++---- drivers/net/wireless/rt2x00/rt2500usb.c | 4 +- drivers/net/wireless/rt2x00/rt2x00crypto.c | 17 ++++----- drivers/net/wireless/wl12xx/wl1251_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1251_tx.c | 4 +- drivers/net/wireless/wl12xx/wl1271_main.c | 13 ++++--- drivers/net/wireless/wl12xx/wl1271_tx.c | 4 +- include/net/mac80211.h | 18 +-------- net/mac80211/cfg.c | 44 ++++++--------------- net/mac80211/debugfs_key.c | 55 +++++++++++---------------- net/mac80211/driver-trace.h | 4 +- net/mac80211/key.c | 25 ++++++------ net/mac80211/key.h | 4 +- net/mac80211/rx.c | 18 +++++---- net/mac80211/tx.c | 22 ++++++----- net/mac80211/wep.c | 2 +- net/mac80211/wpa.c | 6 +-- 31 files changed, 236 insertions(+), 260 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/at76c50x-usb.c b/drivers/net/wireless/at76c50x-usb.c index d5140a87f07..a267bf55574 100644 --- a/drivers/net/wireless/at76c50x-usb.c +++ b/drivers/net/wireless/at76c50x-usb.c @@ -2061,11 +2061,12 @@ static int at76_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, int i; - at76_dbg(DBG_MAC80211, "%s(): cmd %d key->alg %d key->keyidx %d " + at76_dbg(DBG_MAC80211, "%s(): cmd %d key->cipher %d key->keyidx %d " "key->keylen %d", - __func__, cmd, key->alg, key->keyidx, key->keylen); + __func__, cmd, key->cipher, key->keyidx, key->keylen); - if (key->alg != ALG_WEP) + if ((key->cipher != WLAN_CIPHER_SUITE_WEP40) && + (key->cipher != WLAN_CIPHER_SUITE_WEP104)) return -EOPNOTSUPP; key->hw_key_idx = key->keyidx; diff --git a/drivers/net/wireless/ath/ar9170/main.c b/drivers/net/wireless/ath/ar9170/main.c index c67b05f3bcb..29d2253ad7e 100644 --- a/drivers/net/wireless/ath/ar9170/main.c +++ b/drivers/net/wireless/ath/ar9170/main.c @@ -1190,14 +1190,13 @@ static int ar9170_tx_prepare(struct ar9170 *ar, struct sk_buff *skb) if (info->control.hw_key) { icv = info->control.hw_key->icv_len; - switch (info->control.hw_key->alg) { - case ALG_WEP: + switch (info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: keytype = AR9170_TX_MAC_ENCR_RC4; break; - case ALG_TKIP: - keytype = AR9170_TX_MAC_ENCR_RC4; - break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: keytype = AR9170_TX_MAC_ENCR_AES; break; default: @@ -1778,17 +1777,17 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if ((!ar->vif) || (ar->disable_offload)) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - ktype = AR9170_ENC_ALG_WEP64; - else - ktype = AR9170_ENC_ALG_WEP128; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + ktype = AR9170_ENC_ALG_WEP64; + break; + case WLAN_CIPHER_SUITE_WEP104: + ktype = AR9170_ENC_ALG_WEP128; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ktype = AR9170_ENC_ALG_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: ktype = AR9170_ENC_ALG_AESCCMP; break; default: @@ -1827,7 +1826,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, i, sta ? sta->addr : NULL, ktype, 1, key->key + 16, 16); if (err) @@ -1864,7 +1863,7 @@ static int ar9170_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (err) goto out; - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { err = ar9170_upload_key(ar, key->hw_key_idx, NULL, AR9170_ENC_ALG_NONE, 1, diff --git a/drivers/net/wireless/ath/ath5k/base.c b/drivers/net/wireless/ath/ath5k/base.c index 373dcfec689..a729b877467 100644 --- a/drivers/net/wireless/ath/ath5k/base.c +++ b/drivers/net/wireless/ath/ath5k/base.c @@ -3297,11 +3297,12 @@ ath5k_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (sc->opmode == NL80211_IFTYPE_AP) return -EOPNOTSUPP; - switch (key->alg) { - case ALG_WEP: - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (sc->ah->ah_aes_support) break; diff --git a/drivers/net/wireless/ath/ath5k/pcu.c b/drivers/net/wireless/ath/ath5k/pcu.c index 86fdb6ddfaa..af273358c7c 100644 --- a/drivers/net/wireless/ath/ath5k/pcu.c +++ b/drivers/net/wireless/ath/ath5k/pcu.c @@ -695,21 +695,18 @@ int ath5k_hw_reset_key(struct ath5k_hw *ah, u16 entry) static int ath5k_keycache_type(const struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: return AR5K_KEYTABLE_TYPE_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return AR5K_KEYTABLE_TYPE_CCM; - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return AR5K_KEYTABLE_TYPE_40; - else if (key->keylen == WLAN_KEY_LEN_WEP104) - return AR5K_KEYTABLE_TYPE_104; - return -EINVAL; + case WLAN_CIPHER_SUITE_WEP40: + return AR5K_KEYTABLE_TYPE_40; + case WLAN_CIPHER_SUITE_WEP104: + return AR5K_KEYTABLE_TYPE_104; default: return -EINVAL; } - return -EINVAL; } /* @@ -728,7 +725,7 @@ int ath5k_hw_set_key(struct ath5k_hw *ah, u16 entry, bool is_tkip; const u8 *key_ptr; - is_tkip = (key->alg == ALG_TKIP); + is_tkip = (key->cipher == WLAN_CIPHER_SUITE_TKIP); /* * key->keylen comes in from mac80211 in bytes. diff --git a/drivers/net/wireless/ath/ath9k/common.c b/drivers/net/wireless/ath/ath9k/common.c index c86f7d3593a..3100c87a4fc 100644 --- a/drivers/net/wireless/ath/ath9k/common.c +++ b/drivers/net/wireless/ath/ath9k/common.c @@ -46,12 +46,17 @@ int ath9k_cmn_get_hw_crypto_keytype(struct sk_buff *skb) struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb); if (tx_info->control.hw_key) { - if (tx_info->control.hw_key->alg == ALG_WEP) + switch (tx_info->control.hw_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ATH9K_KEY_TYPE_WEP; - else if (tx_info->control.hw_key->alg == ALG_TKIP) + case WLAN_CIPHER_SUITE_TKIP: return ATH9K_KEY_TYPE_TKIP; - else if (tx_info->control.hw_key->alg == ALG_CCMP) + case WLAN_CIPHER_SUITE_CCMP: return ATH9K_KEY_TYPE_AES; + default: + break; + } } return ATH9K_KEY_TYPE_CLEAR; @@ -212,11 +217,11 @@ static int ath_reserve_key_cache_slot_tkip(struct ath_common *common) } static int ath_reserve_key_cache_slot(struct ath_common *common, - enum ieee80211_key_alg alg) + u32 cipher) { int i; - if (alg == ALG_TKIP) + if (cipher == WLAN_CIPHER_SUITE_TKIP) return ath_reserve_key_cache_slot_tkip(common); /* First, try to find slots that would not be available for TKIP. */ @@ -293,14 +298,15 @@ int ath9k_cmn_key_config(struct ath_common *common, memset(&hk, 0, sizeof(hk)); - switch (key->alg) { - case ALG_WEP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: hk.kv_type = ATH9K_CIPHER_WEP; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: hk.kv_type = ATH9K_CIPHER_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: hk.kv_type = ATH9K_CIPHER_AES_CCM; break; default: @@ -316,7 +322,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, vif->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; case NL80211_IFTYPE_ADHOC: if (!sta) { @@ -326,7 +332,7 @@ int ath9k_cmn_key_config(struct ath_common *common, memcpy(gmac, sta->addr, ETH_ALEN); gmac[0] |= 0x01; mac = gmac; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); break; default: idx = key->keyidx; @@ -348,13 +354,13 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EOPNOTSUPP; mac = sta->addr; - idx = ath_reserve_key_cache_slot(common, key->alg); + idx = ath_reserve_key_cache_slot(common, key->cipher); } if (idx < 0) return -ENOSPC; /* no free key cache entries */ - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) ret = ath_setkey_tkip(common, idx, key->key, &hk, mac, vif->type == NL80211_IFTYPE_AP); else @@ -364,7 +370,7 @@ int ath9k_cmn_key_config(struct ath_common *common, return -EIO; set_bit(idx, common->keymap); - if (key->alg == ALG_TKIP) { + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) { set_bit(idx + 64, common->keymap); if (common->splitmic) { set_bit(idx + 32, common->keymap); @@ -389,7 +395,7 @@ void ath9k_cmn_key_delete(struct ath_common *common, return; clear_bit(key->hw_key_idx, common->keymap); - if (key->alg != ALG_TKIP) + if (key->cipher != WLAN_CIPHER_SUITE_TKIP) return; clear_bit(key->hw_key_idx + 64, common->keymap); diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c index 7d09b4b17bb..4e345be6243 100644 --- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c +++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c @@ -1585,9 +1585,10 @@ static int ath9k_htc_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (priv->ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (priv->ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c index a3b0ea90439..1165f909ef0 100644 --- a/drivers/net/wireless/ath/ath9k/main.c +++ b/drivers/net/wireless/ath/ath9k/main.c @@ -1782,9 +1782,10 @@ static int ath9k_set_key(struct ieee80211_hw *hw, key->hw_key_idx = ret; /* push IV and Michael MIC generation to stack */ key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIC; - if (sc->sc_ah->sw_mgmt_crypto && key->alg == ALG_CCMP) + if (sc->sc_ah->sw_mgmt_crypto && + key->cipher == WLAN_CIPHER_SUITE_CCMP) key->flags |= IEEE80211_KEY_FLAG_SW_MGMT; ret = 0; } diff --git a/drivers/net/wireless/b43/main.c b/drivers/net/wireless/b43/main.c index a3e2f2bfe3a..a1186525c70 100644 --- a/drivers/net/wireless/b43/main.c +++ b/drivers/net/wireless/b43/main.c @@ -3759,17 +3759,17 @@ static int b43_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, } err = -EINVAL; - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - algorithm = B43_SEC_ALGO_WEP40; - else - algorithm = B43_SEC_ALGO_WEP104; + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + algorithm = B43_SEC_ALGO_WEP40; + break; + case WLAN_CIPHER_SUITE_WEP104: + algorithm = B43_SEC_ALGO_WEP104; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: algorithm = B43_SEC_ALGO_TKIP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: algorithm = B43_SEC_ALGO_AES; break; default: diff --git a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c index 69155aa448f..3fc982e8792 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn-tx.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn-tx.c @@ -470,8 +470,8 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, { struct ieee80211_key_conf *keyconf = info->control.hw_key; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyconf->key, keyconf->keylen); if (info->flags & IEEE80211_TX_CTL_AMPDU) @@ -479,20 +479,20 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: tx_cmd->sec_ctl = TX_CMD_SEC_TKIP; ieee80211_get_tkip_key(keyconf, skb_frag, IEEE80211_TKIP_P2_KEY, tx_cmd->key); IWL_DEBUG_TX(priv, "tx_cmd with tkip hwcrypto\n"); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: tx_cmd->sec_ctl |= (TX_CMD_SEC_WEP | (keyconf->keyidx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT); - if (keyconf->keylen == WEP_KEY_LEN_128) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyconf->key, keyconf->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -500,7 +500,7 @@ static void iwlagn_tx_cmd_build_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyconf->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyconf->cipher); break; } } diff --git a/drivers/net/wireless/iwlwifi/iwl-agn.c b/drivers/net/wireless/iwlwifi/iwl-agn.c index 43e078b8737..21a52ae05c0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-agn.c +++ b/drivers/net/wireless/iwlwifi/iwl-agn.c @@ -3389,7 +3389,9 @@ static int iwl_mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, * in 1X mode. * In legacy wep mode, we use another host command to the uCode. */ - if (key->alg == ALG_WEP && !sta && vif->type != NL80211_IFTYPE_AP) { + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + !sta && vif->type != NL80211_IFTYPE_AP) { if (cmd == SET_KEY) is_default_wep_key = !priv->key_mapping_key; else diff --git a/drivers/net/wireless/iwlwifi/iwl-dev.h b/drivers/net/wireless/iwlwifi/iwl-dev.h index 59963554702..149619fb1c0 100644 --- a/drivers/net/wireless/iwlwifi/iwl-dev.h +++ b/drivers/net/wireless/iwlwifi/iwl-dev.h @@ -420,7 +420,7 @@ struct iwl_tid_data { }; struct iwl_hw_key { - enum ieee80211_key_alg alg; + u32 cipher; int keylen; u8 keyidx; u8 key[32]; diff --git a/drivers/net/wireless/iwlwifi/iwl-sta.c b/drivers/net/wireless/iwlwifi/iwl-sta.c index 7e0829be5e7..d5e8db37b86 100644 --- a/drivers/net/wireless/iwlwifi/iwl-sta.c +++ b/drivers/net/wireless/iwlwifi/iwl-sta.c @@ -818,7 +818,7 @@ int iwl_set_default_wep_key(struct iwl_priv *priv, keyconf->flags &= ~IEEE80211_KEY_FLAG_GENERATE_IV; keyconf->hw_key_idx = HW_KEY_DEFAULT; - priv->stations[IWL_AP_ID].keyinfo.alg = ALG_WEP; + priv->stations[IWL_AP_ID].keyinfo.cipher = keyconf->cipher; priv->wep_keys[keyconf->keyidx].key_size = keyconf->keylen; memcpy(&priv->wep_keys[keyconf->keyidx].key, &keyconf->key, @@ -856,7 +856,7 @@ static int iwl_set_wep_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; priv->stations[sta_id].keyinfo.keyidx = keyconf->keyidx; @@ -906,7 +906,7 @@ static int iwl_set_ccmp_dynamic_key_info(struct iwl_priv *priv, keyconf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, @@ -955,7 +955,7 @@ static int iwl_set_tkip_dynamic_key_info(struct iwl_priv *priv, spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = 16; if ((priv->stations[sta_id].sta.key.key_flags & STA_KEY_FLG_ENCRYPT_MSK) @@ -1090,24 +1090,26 @@ int iwl_set_dynamic_key(struct iwl_priv *priv, priv->key_mapping_key++; keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: IWL_ERR(priv, - "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + "Unknown alg: %s cipher = %x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: cipher=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; diff --git a/drivers/net/wireless/iwlwifi/iwl3945-base.c b/drivers/net/wireless/iwlwifi/iwl3945-base.c index b14eaf91c7c..faa2e0037e1 100644 --- a/drivers/net/wireless/iwlwifi/iwl3945-base.c +++ b/drivers/net/wireless/iwlwifi/iwl3945-base.c @@ -152,7 +152,7 @@ static int iwl3945_set_ccmp_dynamic_key_info(struct iwl_priv *priv, key_flags &= ~STA_KEY_FLG_INVALID; spin_lock_irqsave(&priv->sta_lock, flags); - priv->stations[sta_id].keyinfo.alg = keyconf->alg; + priv->stations[sta_id].keyinfo.cipher = keyconf->cipher; priv->stations[sta_id].keyinfo.keylen = keyconf->keylen; memcpy(priv->stations[sta_id].keyinfo.key, keyconf->key, keyconf->keylen); @@ -223,23 +223,25 @@ static int iwl3945_set_dynamic_key(struct iwl_priv *priv, keyconf->hw_key_idx = HW_KEY_DYNAMIC; - switch (keyconf->alg) { - case ALG_CCMP: + switch (keyconf->cipher) { + case WLAN_CIPHER_SUITE_CCMP: ret = iwl3945_set_ccmp_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: ret = iwl3945_set_tkip_dynamic_key_info(priv, keyconf, sta_id); break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: ret = iwl3945_set_wep_dynamic_key_info(priv, keyconf, sta_id); break; default: - IWL_ERR(priv, "Unknown alg: %s alg = %d\n", __func__, keyconf->alg); + IWL_ERR(priv, "Unknown alg: %s alg=%x\n", __func__, + keyconf->cipher); ret = -EINVAL; } - IWL_DEBUG_WEP(priv, "Set dynamic key: alg= %d len=%d idx=%d sta=%d ret=%d\n", - keyconf->alg, keyconf->keylen, keyconf->keyidx, + IWL_DEBUG_WEP(priv, "Set dynamic key: alg=%x len=%d idx=%d sta=%d ret=%d\n", + keyconf->cipher, keyconf->keylen, keyconf->keyidx, sta_id, ret); return ret; @@ -255,10 +257,11 @@ static int iwl3945_remove_static_key(struct iwl_priv *priv) static int iwl3945_set_static_key(struct iwl_priv *priv, struct ieee80211_key_conf *key) { - if (key->alg == ALG_WEP) + if (key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) return -EOPNOTSUPP; - IWL_ERR(priv, "Static key invalid: alg %d\n", key->alg); + IWL_ERR(priv, "Static key invalid: cipher %x\n", key->cipher); return -EINVAL; } @@ -370,23 +373,25 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, struct iwl3945_tx_cmd *tx_cmd = (struct iwl3945_tx_cmd *)cmd->cmd.payload; struct iwl_hw_key *keyinfo = &priv->stations[sta_id].keyinfo; - switch (keyinfo->alg) { - case ALG_CCMP: + tx_cmd->sec_ctl = 0; + + switch (keyinfo->cipher) { + case WLAN_CIPHER_SUITE_CCMP: tx_cmd->sec_ctl = TX_CMD_SEC_CCM; memcpy(tx_cmd->key, keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "tx_cmd with AES hwcrypto\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: break; - case ALG_WEP: - tx_cmd->sec_ctl = TX_CMD_SEC_WEP | + case WLAN_CIPHER_SUITE_WEP104: + tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; + /* fall through */ + case WLAN_CIPHER_SUITE_WEP40: + tx_cmd->sec_ctl |= TX_CMD_SEC_WEP | (info->control.hw_key->hw_key_idx & TX_CMD_SEC_MSK) << TX_CMD_SEC_SHIFT; - if (keyinfo->keylen == 13) - tx_cmd->sec_ctl |= TX_CMD_SEC_KEY128; - memcpy(&tx_cmd->key[3], keyinfo->key, keyinfo->keylen); IWL_DEBUG_TX(priv, "Configuring packet for WEP encryption " @@ -394,7 +399,7 @@ static void iwl3945_build_tx_cmd_hwcrypto(struct iwl_priv *priv, break; default: - IWL_ERR(priv, "Unknown encode alg %d\n", keyinfo->alg); + IWL_ERR(priv, "Unknown encode cipher %x\n", keyinfo->cipher); break; } } diff --git a/drivers/net/wireless/p54/main.c b/drivers/net/wireless/p54/main.c index 47db439b63b..622d27b6d8f 100644 --- a/drivers/net/wireless/p54/main.c +++ b/drivers/net/wireless/p54/main.c @@ -429,8 +429,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, mutex_lock(&priv->conf_mutex); if (cmd == SET_KEY) { - switch (key->alg) { - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_TKIP: if (!(priv->privacy_caps & (BR_DESC_PRIV_CAP_MICHAEL | BR_DESC_PRIV_CAP_TKIP))) { ret = -EOPNOTSUPP; @@ -439,7 +439,8 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_TKIPMICHAEL; break; - case ALG_WEP: + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_WEP)) { ret = -EOPNOTSUPP; goto out_unlock; @@ -447,7 +448,7 @@ static int p54_set_key(struct ieee80211_hw *dev, enum set_key_cmd cmd, key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; algo = P54_CRYPTO_WEP; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!(priv->privacy_caps & BR_DESC_PRIV_CAP_AESCCMP)) { ret = -EOPNOTSUPP; goto out_unlock; diff --git a/drivers/net/wireless/p54/txrx.c b/drivers/net/wireless/p54/txrx.c index 427b46f558e..e53f8cec779 100644 --- a/drivers/net/wireless/p54/txrx.c +++ b/drivers/net/wireless/p54/txrx.c @@ -683,14 +683,15 @@ static void p54_tx_80211_header(struct p54_common *priv, struct sk_buff *skb, } } -static u8 p54_convert_algo(enum ieee80211_key_alg alg) +static u8 p54_convert_algo(u32 cipher) { - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return P54_CRYPTO_WEP; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return P54_CRYPTO_TKIPMICHAEL; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return P54_CRYPTO_AESCCMP; default: return 0; @@ -731,7 +732,7 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) if (info->control.hw_key) { crypt_offset = ieee80211_get_hdrlen_from_skb(skb); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { u8 *iv = (u8 *)(skb->data + crypt_offset); /* * The firmware excepts that the IV has to have @@ -827,10 +828,10 @@ int p54_tx_80211(struct ieee80211_hw *dev, struct sk_buff *skb) hdr->tries = ridx; txhdr->rts_rate_idx = 0; if (info->control.hw_key) { - txhdr->key_type = p54_convert_algo(info->control.hw_key->alg); + txhdr->key_type = p54_convert_algo(info->control.hw_key->cipher); txhdr->key_len = min((u8)16, info->control.hw_key->keylen); memcpy(txhdr->key, info->control.hw_key->key, txhdr->key_len); - if (info->control.hw_key->alg == ALG_TKIP) { + if (info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { /* reserve space for the MIC key */ len += 8; memcpy(skb_put(skb, 8), &(info->control.hw_key->key diff --git a/drivers/net/wireless/rt2x00/rt2500usb.c b/drivers/net/wireless/rt2x00/rt2500usb.c index cdaf93f4826..97cf72f8bc1 100644 --- a/drivers/net/wireless/rt2x00/rt2500usb.c +++ b/drivers/net/wireless/rt2x00/rt2500usb.c @@ -355,7 +355,9 @@ static int rt2500usb_config_key(struct rt2x00_dev *rt2x00dev, * it is known that not work at least on some hardware. * SW crypto will be used in that case. */ - if (key->alg == ALG_WEP && key->keyidx != 0) + if ((key->cipher == WLAN_CIPHER_SUITE_WEP40 || + key->cipher == WLAN_CIPHER_SUITE_WEP104) && + key->keyidx != 0) return -EOPNOTSUPP; /* diff --git a/drivers/net/wireless/rt2x00/rt2x00crypto.c b/drivers/net/wireless/rt2x00/rt2x00crypto.c index 583dacd8d24..5e9074bf2b8 100644 --- a/drivers/net/wireless/rt2x00/rt2x00crypto.c +++ b/drivers/net/wireless/rt2x00/rt2x00crypto.c @@ -31,15 +31,14 @@ enum cipher rt2x00crypto_key_to_cipher(struct ieee80211_key_conf *key) { - switch (key->alg) { - case ALG_WEP: - if (key->keylen == WLAN_KEY_LEN_WEP40) - return CIPHER_WEP64; - else - return CIPHER_WEP128; - case ALG_TKIP: + switch (key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + return CIPHER_WEP64; + case WLAN_CIPHER_SUITE_WEP104: + return CIPHER_WEP128; + case WLAN_CIPHER_SUITE_TKIP: return CIPHER_TKIP; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return CIPHER_AES; default: return CIPHER_NONE; @@ -95,7 +94,7 @@ unsigned int rt2x00crypto_tx_overhead(struct rt2x00_dev *rt2x00dev, overhead += key->iv_len; if (!(key->flags & IEEE80211_KEY_FLAG_GENERATE_MMIC)) { - if (key->alg == ALG_TKIP) + if (key->cipher == WLAN_CIPHER_SUITE_TKIP) overhead += 8; } diff --git a/drivers/net/wireless/wl12xx/wl1251_main.c b/drivers/net/wireless/wl12xx/wl1251_main.c index 861a5f33761..6d31c855fcf 100644 --- a/drivers/net/wireless/wl12xx/wl1251_main.c +++ b/drivers/net/wireless/wl12xx/wl1251_main.c @@ -725,8 +725,9 @@ static int wl1251_set_key_type(struct wl1251 *wl, struct ieee80211_key_conf *mac80211_key, const u8 *addr) { - switch (mac80211_key->alg) { - case ALG_WEP: + switch (mac80211_key->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_WEP_DEFAULT; else @@ -734,7 +735,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_TKIP_MIC_GROUP; else @@ -742,7 +743,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->hw_key_idx = mac80211_key->keyidx; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (is_broadcast_ether_addr(addr)) key->key_type = KEY_AES_GROUP; else @@ -750,7 +751,7 @@ static int wl1251_set_key_type(struct wl1251 *wl, mac80211_key->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; break; default: - wl1251_error("Unknown key algo 0x%x", mac80211_key->alg); + wl1251_error("Unknown key cipher 0x%x", mac80211_key->cipher); return -EOPNOTSUPP; } @@ -783,7 +784,7 @@ static int wl1251_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1251_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1251_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1251_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key->alg, key->keyidx, key->keylen, key->flags); + key->cipher, key->keyidx, key->keylen, key->flags); wl1251_dump(DEBUG_CRYPT, "KEY: ", key->key, key->keylen); if (is_zero_ether_addr(addr)) { diff --git a/drivers/net/wireless/wl12xx/wl1251_tx.c b/drivers/net/wireless/wl12xx/wl1251_tx.c index a38ec199187..6634b3e27cf 100644 --- a/drivers/net/wireless/wl12xx/wl1251_tx.c +++ b/drivers/net/wireless/wl12xx/wl1251_tx.c @@ -189,7 +189,7 @@ static int wl1251_tx_send_packet(struct wl1251 *wl, struct sk_buff *skb, tx_hdr = (struct tx_double_buffer_desc *) skb->data; if (control->control.hw_key && - control->control.hw_key->alg == ALG_TKIP) { + control->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen; __le16 fc; u16 length; @@ -399,7 +399,7 @@ static void wl1251_tx_packet_cb(struct wl1251 *wl, */ frame = skb_pull(skb, sizeof(struct tx_double_buffer_desc)); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(frame + WL1251_TKIP_IV_SPACE, frame, hdrlen); skb_pull(skb, WL1251_TKIP_IV_SPACE); diff --git a/drivers/net/wireless/wl12xx/wl1271_main.c b/drivers/net/wireless/wl12xx/wl1271_main.c index 9d68f0012f0..30194c0f36a 100644 --- a/drivers/net/wireless/wl12xx/wl1271_main.c +++ b/drivers/net/wireless/wl12xx/wl1271_main.c @@ -1439,7 +1439,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, wl1271_debug(DEBUG_CRYPT, "CMD: 0x%x", cmd); wl1271_dump(DEBUG_CRYPT, "ADDR: ", addr, ETH_ALEN); wl1271_debug(DEBUG_CRYPT, "Key: algo:0x%x, id:%d, len:%d flags 0x%x", - key_conf->alg, key_conf->keyidx, + key_conf->cipher, key_conf->keyidx, key_conf->keylen, key_conf->flags); wl1271_dump(DEBUG_CRYPT, "KEY: ", key_conf->key, key_conf->keylen); @@ -1455,20 +1455,21 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, if (ret < 0) goto out_unlock; - switch (key_conf->alg) { - case ALG_WEP: + switch (key_conf->cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key_type = KEY_WEP; key_conf->hw_key_idx = key_conf->keyidx; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key_type = KEY_TKIP; key_conf->hw_key_idx = key_conf->keyidx; tx_seq_32 = WL1271_TX_SECURITY_HI32(wl->tx_security_seq); tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key_type = KEY_AES; key_conf->flags |= IEEE80211_KEY_FLAG_GENERATE_IV; @@ -1476,7 +1477,7 @@ static int wl1271_op_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd, tx_seq_16 = WL1271_TX_SECURITY_LO16(wl->tx_security_seq); break; default: - wl1271_error("Unknown key algo 0x%x", key_conf->alg); + wl1271_error("Unknown key algo 0x%x", key_conf->cipher); ret = -EOPNOTSUPP; goto out_sleep; diff --git a/drivers/net/wireless/wl12xx/wl1271_tx.c b/drivers/net/wireless/wl12xx/wl1271_tx.c index c592cc2e9fe..dc0b46c93c4 100644 --- a/drivers/net/wireless/wl12xx/wl1271_tx.c +++ b/drivers/net/wireless/wl12xx/wl1271_tx.c @@ -193,7 +193,7 @@ static int wl1271_tx_frame(struct wl1271 *wl, struct sk_buff *skb) info = IEEE80211_SKB_CB(skb); if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) extra = WL1271_TKIP_IV_SPACE; if (info->control.hw_key) { @@ -347,7 +347,7 @@ static void wl1271_tx_complete_packet(struct wl1271 *wl, /* remove TKIP header space if present */ if (info->control.hw_key && - info->control.hw_key->alg == ALG_TKIP) { + info->control.hw_key->cipher == WLAN_CIPHER_SUITE_TKIP) { int hdrlen = ieee80211_get_hdrlen_from_skb(skb); memmove(skb->data + WL1271_TKIP_IV_SPACE, skb->data, hdrlen); skb_pull(skb, WL1271_TKIP_IV_SPACE); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 871ed1de736..914c747ac3a 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -788,20 +788,6 @@ static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif) return false; } -/** - * enum ieee80211_key_alg - key algorithm - * @ALG_WEP: WEP40 or WEP104 - * @ALG_TKIP: TKIP - * @ALG_CCMP: CCMP (AES) - * @ALG_AES_CMAC: AES-128-CMAC - */ -enum ieee80211_key_alg { - ALG_WEP, - ALG_TKIP, - ALG_CCMP, - ALG_AES_CMAC, -}; - /** * enum ieee80211_key_flags - key flags * @@ -839,7 +825,7 @@ enum ieee80211_key_flags { * @hw_key_idx: To be set by the driver, this is the key index the driver * wants to be given when a frame is transmitted and needs to be * encrypted in hardware. - * @alg: The key algorithm. + * @cipher: The key's cipher suite selector. * @flags: key flags, see &enum ieee80211_key_flags. * @keyidx: the key index (0-3) * @keylen: key material length @@ -852,7 +838,7 @@ enum ieee80211_key_flags { * @iv_len: The IV length for this key type */ struct ieee80211_key_conf { - enum ieee80211_key_alg alg; + u32 cipher; u8 icv_len; u8 iv_len; u8 hw_key_idx; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 19c6146010b..9a35d9e7efd 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -116,7 +116,6 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, { struct ieee80211_sub_if_data *sdata; struct sta_info *sta = NULL; - enum ieee80211_key_alg alg; struct ieee80211_key *key; int err; @@ -125,31 +124,20 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, sdata = IEEE80211_DEV_TO_SUB_IF(dev); + /* reject WEP and TKIP keys if WEP failed to initialize */ switch (params->cipher) { case WLAN_CIPHER_SUITE_WEP40: - case WLAN_CIPHER_SUITE_WEP104: - alg = ALG_WEP; - break; case WLAN_CIPHER_SUITE_TKIP: - alg = ALG_TKIP; - break; - case WLAN_CIPHER_SUITE_CCMP: - alg = ALG_CCMP; - break; - case WLAN_CIPHER_SUITE_AES_CMAC: - alg = ALG_AES_CMAC; + case WLAN_CIPHER_SUITE_WEP104: + if (IS_ERR(sdata->local->wep_tx_tfm)) + return -EINVAL; break; default: - return -EINVAL; + break; } - /* reject WEP and TKIP keys if WEP failed to initialize */ - if ((alg == ALG_WEP || alg == ALG_TKIP) && - IS_ERR(sdata->local->wep_tx_tfm)) - return -EINVAL; - - key = ieee80211_key_alloc(alg, key_idx, params->key_len, params->key, - params->seq_len, params->seq); + key = ieee80211_key_alloc(params->cipher, key_idx, params->key_len, + params->key, params->seq_len, params->seq); if (IS_ERR(key)) return PTR_ERR(key); @@ -247,10 +235,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, memset(¶ms, 0, sizeof(params)); - switch (key->conf.alg) { - case ALG_TKIP: - params.cipher = WLAN_CIPHER_SUITE_TKIP; + params.cipher = key->conf.cipher; + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_TKIP: iv32 = key->u.tkip.tx.iv32; iv16 = key->u.tkip.tx.iv16; @@ -268,8 +256,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_CCMP: - params.cipher = WLAN_CIPHER_SUITE_CCMP; + case WLAN_CIPHER_SUITE_CCMP: seq[0] = key->u.ccmp.tx_pn[5]; seq[1] = key->u.ccmp.tx_pn[4]; seq[2] = key->u.ccmp.tx_pn[3]; @@ -279,14 +266,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, params.seq = seq; params.seq_len = 6; break; - case ALG_WEP: - if (key->conf.keylen == 5) - params.cipher = WLAN_CIPHER_SUITE_WEP40; - else - params.cipher = WLAN_CIPHER_SUITE_WEP104; - break; - case ALG_AES_CMAC: - params.cipher = WLAN_CIPHER_SUITE_AES_CMAC; + case WLAN_CIPHER_SUITE_AES_CMAC: seq[0] = key->u.aes_cmac.tx_pn[5]; seq[1] = key->u.aes_cmac.tx_pn[4]; seq[2] = key->u.aes_cmac.tx_pn[3]; diff --git a/net/mac80211/debugfs_key.c b/net/mac80211/debugfs_key.c index fa5e76e658e..1647f8dc5cd 100644 --- a/net/mac80211/debugfs_key.c +++ b/net/mac80211/debugfs_key.c @@ -64,26 +64,13 @@ static ssize_t key_algorithm_read(struct file *file, char __user *userbuf, size_t count, loff_t *ppos) { - char *alg; + char buf[15]; struct ieee80211_key *key = file->private_data; + u32 c = key->conf.cipher; - switch (key->conf.alg) { - case ALG_WEP: - alg = "WEP\n"; - break; - case ALG_TKIP: - alg = "TKIP\n"; - break; - case ALG_CCMP: - alg = "CCMP\n"; - break; - case ALG_AES_CMAC: - alg = "AES-128-CMAC\n"; - break; - default: - return 0; - } - return simple_read_from_buffer(userbuf, count, ppos, alg, strlen(alg)); + sprintf(buf, "%.2x-%.2x-%.2x:%d\n", + c >> 24, (c >> 16) & 0xff, (c >> 8) & 0xff, c & 0xff); + return simple_read_from_buffer(userbuf, count, ppos, buf, strlen(buf)); } KEY_OPS(algorithm); @@ -95,21 +82,22 @@ static ssize_t key_tx_spec_read(struct file *file, char __user *userbuf, int len; struct ieee80211_key *key = file->private_data; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: len = scnprintf(buf, sizeof(buf), "%08x %04x\n", key->u.tkip.tx.iv32, key->u.tkip.tx.iv16); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: tpn = key->u.ccmp.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], tpn[5]); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: tpn = key->u.aes_cmac.tx_pn; len = scnprintf(buf, sizeof(buf), "%02x%02x%02x%02x%02x%02x\n", tpn[0], tpn[1], tpn[2], tpn[3], tpn[4], @@ -130,11 +118,12 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, int i, len; const u8 *rpn; - switch (key->conf.alg) { - case ALG_WEP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: len = scnprintf(buf, sizeof(buf), "\n"); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: for (i = 0; i < NUM_RX_DATA_QUEUES; i++) p += scnprintf(p, sizeof(buf)+buf-p, "%08x %04x\n", @@ -142,7 +131,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, key->u.tkip.rx[i].iv16); len = p - buf; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: for (i = 0; i < NUM_RX_DATA_QUEUES + 1; i++) { rpn = key->u.ccmp.rx_pn[i]; p += scnprintf(p, sizeof(buf)+buf-p, @@ -152,7 +141,7 @@ static ssize_t key_rx_spec_read(struct file *file, char __user *userbuf, } len = p - buf; break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: rpn = key->u.aes_cmac.rx_pn; p += scnprintf(p, sizeof(buf)+buf-p, "%02x%02x%02x%02x%02x%02x\n", @@ -174,11 +163,11 @@ static ssize_t key_replays_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_CCMP: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_CCMP: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.ccmp.replays); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.replays); break; @@ -196,8 +185,8 @@ static ssize_t key_icverrors_read(struct file *file, char __user *userbuf, char buf[20]; int len; - switch (key->conf.alg) { - case ALG_AES_CMAC: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_AES_CMAC: len = scnprintf(buf, sizeof(buf), "%u\n", key->u.aes_cmac.icverrors); break; diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index 5d5d2a97466..b5a95582d81 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -336,7 +336,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ENTRY VIF_ENTRY STA_ENTRY - __field(enum ieee80211_key_alg, alg) + __field(u32, cipher) __field(u8, hw_key_idx) __field(u8, flags) __field(s8, keyidx) @@ -346,7 +346,7 @@ TRACE_EVENT(drv_set_key, LOCAL_ASSIGN; VIF_ASSIGN; STA_ASSIGN; - __entry->alg = key->alg; + __entry->cipher = key->cipher; __entry->flags = key->flags; __entry->keyidx = key->keyidx; __entry->hw_key_idx = key->hw_key_idx; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index d6dbc8ea4ea..3203d1d3cd3 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -227,9 +227,7 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, } } -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq) { @@ -249,15 +247,16 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, key->conf.flags = 0; key->flags = 0; - key->conf.alg = alg; + key->conf.cipher = cipher; key->conf.keyidx = idx; key->conf.keylen = key_len; - switch (alg) { - case ALG_WEP: + switch (cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: key->conf.iv_len = WEP_IV_LEN; key->conf.icv_len = WEP_ICV_LEN; break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: key->conf.iv_len = TKIP_IV_LEN; key->conf.icv_len = TKIP_ICV_LEN; if (seq) { @@ -269,7 +268,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: key->conf.iv_len = CCMP_HDR_LEN; key->conf.icv_len = CCMP_MIC_LEN; if (seq) { @@ -279,7 +278,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, seq[CCMP_PN_LEN - j - 1]; } break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: key->conf.iv_len = 0; key->conf.icv_len = sizeof(struct ieee80211_mmie); if (seq) @@ -290,7 +289,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, memcpy(key->conf.key, key_data, key_len); INIT_LIST_HEAD(&key->list); - if (alg == ALG_CCMP) { + if (cipher == WLAN_CIPHER_SUITE_CCMP) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -303,7 +302,7 @@ struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, } } - if (alg == ALG_AES_CMAC) { + if (cipher == WLAN_CIPHER_SUITE_AES_CMAC) { /* * Initialize AES key state here as an optimization so that * it does not need to be initialized for every packet. @@ -328,9 +327,9 @@ static void __ieee80211_key_destroy(struct ieee80211_key *key) if (key->local) ieee80211_key_disable_hw_accel(key); - if (key->conf.alg == ALG_CCMP) + if (key->conf.cipher == WLAN_CIPHER_SUITE_CCMP) ieee80211_aes_key_free(key->u.ccmp.tfm); - if (key->conf.alg == ALG_AES_CMAC) + if (key->conf.cipher == WLAN_CIPHER_SUITE_AES_CMAC) ieee80211_aes_cmac_key_free(key->u.aes_cmac.tfm); if (key->local) ieee80211_debugfs_key_remove(key); diff --git a/net/mac80211/key.h b/net/mac80211/key.h index b665bbb7a47..53b5ce12536 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -123,9 +123,7 @@ struct ieee80211_key { struct ieee80211_key_conf conf; }; -struct ieee80211_key *ieee80211_key_alloc(enum ieee80211_key_alg alg, - int idx, - size_t key_len, +struct ieee80211_key *ieee80211_key_alloc(u32 cipher, int idx, size_t key_len, const u8 *key_data, size_t seq_len, const u8 *seq); /* diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index f24a0a1cff1..ad2427021b2 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -961,7 +961,8 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * pairwise or station-to-station keys, but for WEP we allow * using a key index as well. */ - if (rx->key && rx->key->conf.alg != ALG_WEP && + if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 && !is_multicast_ether_addr(hdr->addr1)) rx->key = NULL; } @@ -977,8 +978,9 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) return RX_DROP_UNUSABLE; /* the hdr variable is invalid now! */ - switch (rx->key->conf.alg) { - case ALG_WEP: + switch (rx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: /* Check for weak IVs if possible */ if (rx->sta && ieee80211_is_data(fc) && (!(status->flag & RX_FLAG_IV_STRIPPED) || @@ -988,13 +990,13 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) result = ieee80211_crypto_wep_decrypt(rx); break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: result = ieee80211_crypto_tkip_decrypt(rx); break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: result = ieee80211_crypto_ccmp_decrypt(rx); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: result = ieee80211_crypto_aes_cmac_decrypt(rx); break; } @@ -1291,7 +1293,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) /* This is the first fragment of a new frame. */ entry = ieee80211_reassemble_add(rx->sdata, frag, seq, rx->queue, &(rx->skb)); - if (rx->key && rx->key->conf.alg == ALG_CCMP && + if (rx->key && rx->key->conf.cipher == WLAN_CIPHER_SUITE_CCMP && ieee80211_has_protected(fc)) { int queue = ieee80211_is_mgmt(fc) ? NUM_RX_DATA_QUEUES : rx->queue; @@ -1320,7 +1322,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) int i; u8 pn[CCMP_PN_LEN], *rpn; int queue; - if (!rx->key || rx->key->conf.alg != ALG_CCMP) + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_CCMP) return RX_DROP_UNUSABLE; memcpy(pn, entry->last_pn, CCMP_PN_LEN); for (i = CCMP_PN_LEN - 1; i >= 0; i--) { diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index c54db966926..bc4fefc9166 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -543,15 +543,16 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) tx->key->tx_rx_count++; /* TODO: add threshold stuff again */ - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: if (ieee80211_is_auth(hdr->frame_control)) break; - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: if (!ieee80211_is_data_present(hdr->frame_control)) tx->key = NULL; break; - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: if (!ieee80211_is_data_present(hdr->frame_control) && !ieee80211_use_mfp(hdr->frame_control, tx->sta, tx->skb)) @@ -561,7 +562,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) IEEE80211_KEY_FLAG_SW_MGMT) && ieee80211_is_mgmt(hdr->frame_control); break; - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: if (!ieee80211_is_mgmt(hdr->frame_control)) tx->key = NULL; break; @@ -949,14 +950,15 @@ ieee80211_tx_h_encrypt(struct ieee80211_tx_data *tx) if (!tx->key) return TX_CONTINUE; - switch (tx->key->conf.alg) { - case ALG_WEP: + switch (tx->key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: return ieee80211_crypto_wep_encrypt(tx); - case ALG_TKIP: + case WLAN_CIPHER_SUITE_TKIP: return ieee80211_crypto_tkip_encrypt(tx); - case ALG_CCMP: + case WLAN_CIPHER_SUITE_CCMP: return ieee80211_crypto_ccmp_encrypt(tx); - case ALG_AES_CMAC: + case WLAN_CIPHER_SUITE_AES_CMAC: return ieee80211_crypto_aes_cmac_encrypt(tx); } diff --git a/net/mac80211/wep.c b/net/mac80211/wep.c index 9ebc8d8a1f5..f27484c22b9 100644 --- a/net/mac80211/wep.c +++ b/net/mac80211/wep.c @@ -240,7 +240,7 @@ static int ieee80211_wep_decrypt(struct ieee80211_local *local, keyidx = skb->data[hdrlen + 3] >> 6; - if (!key || keyidx != key->conf.keyidx || key->conf.alg != ALG_WEP) + if (!key || keyidx != key->conf.keyidx) return -1; klen = 3 + key->conf.keylen; diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 8d59d27d887..b08ad94b56d 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -36,8 +36,8 @@ ieee80211_tx_h_michael_mic_add(struct ieee80211_tx_data *tx) int tail; hdr = (struct ieee80211_hdr *)skb->data; - if (!tx->key || tx->key->conf.alg != ALG_TKIP || skb->len < 24 || - !ieee80211_is_data_present(hdr->frame_control)) + if (!tx->key || tx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || + skb->len < 24 || !ieee80211_is_data_present(hdr->frame_control)) return TX_CONTINUE; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -94,7 +94,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) if (status->flag & RX_FLAG_MMIC_STRIPPED) return RX_CONTINUE; - if (!rx->key || rx->key->conf.alg != ALG_TKIP || + if (!rx->key || rx->key->conf.cipher != WLAN_CIPHER_SUITE_TKIP || !ieee80211_has_protected(hdr->frame_control) || !ieee80211_is_data_present(hdr->frame_control)) return RX_CONTINUE; -- cgit v1.2.3 From 2244d07bfa2097cb00600da91c715a8aa547917e Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 17 Aug 2010 08:59:14 +0000 Subject: net: simplify flags for tx timestamping This patch removes the abstraction introduced by the union skb_shared_tx in the shared skb data. The access of the different union elements at several places led to some confusion about accessing the shared tx_flags e.g. in skb_orphan_try(). http://marc.info/?l=linux-netdev&m=128084897415886&w=2 Signed-off-by: Oliver Hartkopp Signed-off-by: David S. Miller --- Documentation/networking/timestamping.txt | 22 +++++++++------- drivers/net/bfin_mac.c | 10 +++---- drivers/net/gianfar.c | 15 +++++------ drivers/net/igb/igb.h | 2 +- drivers/net/igb/igb_main.c | 11 ++++---- include/linux/skbuff.h | 44 +++++++++++-------------------- include/net/ip.h | 2 +- include/net/sock.h | 8 ++---- net/can/raw.c | 4 +-- net/core/dev.c | 6 ++--- net/core/skbuff.c | 2 +- net/ipv4/icmp.c | 4 +-- net/ipv4/ip_output.c | 6 ++--- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 4 +-- net/packet/af_packet.c | 4 +-- net/socket.c | 9 +++---- 17 files changed, 68 insertions(+), 87 deletions(-) (limited to 'include/net') diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt index e8c8f4f06c6..98097d8cb91 100644 --- a/Documentation/networking/timestamping.txt +++ b/Documentation/networking/timestamping.txt @@ -172,15 +172,19 @@ struct skb_shared_hwtstamps { }; Time stamps for outgoing packets are to be generated as follows: -- In hard_start_xmit(), check if skb_tx(skb)->hardware is set no-zero. - If yes, then the driver is expected to do hardware time stamping. +- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) + is set no-zero. If yes, then the driver is expected to do hardware time + stamping. - If this is possible for the skb and requested, then declare - that the driver is doing the time stamping by setting the field - skb_tx(skb)->in_progress non-zero. You might want to keep a pointer - to the associated skb for the next step and not free the skb. A driver - not supporting hardware time stamping doesn't do that. A driver must - never touch sk_buff::tstamp! It is used to store software generated - time stamps by the network subsystem. + that the driver is doing the time stamping by setting the flag + SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with + + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; + + You might want to keep a pointer to the associated skb for the next step + and not free the skb. A driver not supporting hardware time stamping doesn't + do that. A driver must never touch sk_buff::tstamp! It is used to store + software generated time stamps by the network subsystem. - As soon as the driver has sent the packet and/or obtained a hardware time stamp for it, it passes the time stamp back by calling skb_hwtstamp_tx() with the original skb, the raw @@ -191,6 +195,6 @@ Time stamps for outgoing packets are to be generated as follows: this would occur at a later time in the processing pipeline than other software time stamping and therefore could lead to unexpected deltas between time stamps. -- If the driver did not call set skb_tx(skb)->in_progress, then +- If the driver did not set the SKBTX_IN_PROGRESS flag (see above), then dev_hard_start_xmit() checks whether software time stamping is wanted as fallback and potentially generates the time stamp. diff --git a/drivers/net/bfin_mac.c b/drivers/net/bfin_mac.c index 012613fde3f..7a0e4156fad 100644 --- a/drivers/net/bfin_mac.c +++ b/drivers/net/bfin_mac.c @@ -803,15 +803,14 @@ static void bfin_dump_hwtamp(char *s, ktime_t *hw, ktime_t *ts, struct timecompa static void bfin_tx_hwtstamp(struct net_device *netdev, struct sk_buff *skb) { struct bfin_mac_local *lp = netdev_priv(netdev); - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->hardware) { + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) { int timeout_cnt = MAX_TIMEOUT_CNT; /* When doing time stamping, keep the connection to the socket * a while longer */ - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; /* * The timestamping is done at the EMAC module's MII/RMII interface @@ -991,7 +990,6 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, struct bfin_mac_local *lp = netdev_priv(dev); u16 *data; u32 data_align = (unsigned long)(skb->data) & 0x3; - union skb_shared_tx *shtx = skb_tx(skb); current_tx_ptr->skb = skb; @@ -1005,7 +1003,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, * of this field are the length of the packet payload in bytes and the higher * 4 bits are the timestamping enable field. */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *data |= 0x1000; current_tx_ptr->desc_a.start_addr = (u32)data; @@ -1015,7 +1013,7 @@ static int bfin_mac_hard_start_xmit(struct sk_buff *skb, } else { *((u16 *)(current_tx_ptr->packet)) = (u16)(skb->len); /* enable timestamping for the sent packet */ - if (shtx->hardware) + if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP) *((u16 *)(current_tx_ptr->packet)) |= 0x1000; memcpy((u8 *)(current_tx_ptr->packet + 2), skb->data, skb->len); diff --git a/drivers/net/gianfar.c b/drivers/net/gianfar.c index 3d9f958ebd2..e6048d6ab0e 100644 --- a/drivers/net/gianfar.c +++ b/drivers/net/gianfar.c @@ -2048,7 +2048,6 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) u32 bufaddr; unsigned long flags; unsigned int nr_frags, nr_txbds, length; - union skb_shared_tx *shtx; /* * TOE=1 frames larger than 2500 bytes may see excess delays @@ -2069,10 +2068,10 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) txq = netdev_get_tx_queue(dev, rq); base = tx_queue->tx_bd_base; regs = tx_queue->grp->regs; - shtx = skb_tx(skb); /* check if time stamp should be generated */ - if (unlikely(shtx->hardware && priv->hwts_tx_en)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP && + priv->hwts_tx_en)) do_tstamp = 1; /* make space for additional header when fcb is needed */ @@ -2174,7 +2173,7 @@ static int gfar_start_xmit(struct sk_buff *skb, struct net_device *dev) /* Setup tx hardware time stamping if requested */ if (unlikely(do_tstamp)) { - shtx->in_progress = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; if (fcb == NULL) fcb = gfar_add_fcb(skb); fcb->ptp = 1; @@ -2446,7 +2445,6 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) int howmany = 0; u32 lstatus; size_t buflen; - union skb_shared_tx *shtx; rx_queue = priv->rx_queue[tx_queue->qindex]; bdp = tx_queue->dirty_tx; @@ -2461,8 +2459,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) * When time stamping, one additional TxBD must be freed. * Also, we need to dma_unmap_single() the TxPAL. */ - shtx = skb_tx(skb); - if (unlikely(shtx->in_progress)) + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) nr_txbds = frags + 2; else nr_txbds = frags + 1; @@ -2476,7 +2473,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) (lstatus & BD_LENGTH_MASK)) break; - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { next = next_txbd(bdp, base, tx_ring_size); buflen = next->length + GMAC_FCB_LEN; } else @@ -2485,7 +2482,7 @@ static int gfar_clean_tx_ring(struct gfar_priv_tx_q *tx_queue) dma_unmap_single(&priv->ofdev->dev, bdp->bufPtr, buflen, DMA_TO_DEVICE); - if (unlikely(shtx->in_progress)) { + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) { struct skb_shared_hwtstamps shhwtstamps; u64 *ns = (u64*) (((u32)skb->data + 0x10) & ~0x7); memset(&shhwtstamps, 0, sizeof(shhwtstamps)); diff --git a/drivers/net/igb/igb.h b/drivers/net/igb/igb.h index 6e63d9a7fc7..44e0ff1494e 100644 --- a/drivers/net/igb/igb.h +++ b/drivers/net/igb/igb.h @@ -143,7 +143,7 @@ struct igb_buffer { u16 next_to_watch; unsigned int bytecount; u16 gso_segs; - union skb_shared_tx shtx; + u8 tx_flags; u8 mapped_as_page; }; /* RX */ diff --git a/drivers/net/igb/igb_main.c b/drivers/net/igb/igb_main.c index 9b4e5895f5f..985e37cf17b 100644 --- a/drivers/net/igb/igb_main.c +++ b/drivers/net/igb/igb_main.c @@ -3954,7 +3954,7 @@ static inline int igb_tx_map_adv(struct igb_ring *tx_ring, struct sk_buff *skb, } tx_ring->buffer_info[i].skb = skb; - tx_ring->buffer_info[i].shtx = skb_shinfo(skb)->tx_flags; + tx_ring->buffer_info[i].tx_flags = skb_shinfo(skb)->tx_flags; /* multiply data chunks by size of headers */ tx_ring->buffer_info[i].bytecount = ((gso_segs - 1) * hlen) + skb->len; tx_ring->buffer_info[i].gso_segs = gso_segs; @@ -4088,7 +4088,6 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, u32 tx_flags = 0; u16 first; u8 hdr_len = 0; - union skb_shared_tx *shtx = skb_tx(skb); /* need: 1 descriptor per page, * + 2 desc gap to keep tail from touching head, @@ -4100,8 +4099,8 @@ netdev_tx_t igb_xmit_frame_ring_adv(struct sk_buff *skb, return NETDEV_TX_BUSY; } - if (unlikely(shtx->hardware)) { - shtx->in_progress = 1; + if (unlikely(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)) { + skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS; tx_flags |= IGB_TX_FLAGS_TSTAMP; } @@ -5319,7 +5318,7 @@ static void igb_tx_hwtstamp(struct igb_q_vector *q_vector, struct igb_buffer *bu u64 regval; /* if skb does not support hw timestamp or TX stamp not valid exit */ - if (likely(!buffer_info->shtx.hardware) || + if (likely(!(buffer_info->tx_flags & SKBTX_HW_TSTAMP)) || !(rd32(E1000_TSYNCTXCTL) & E1000_TSYNCTXCTL_VALID)) return; @@ -5500,7 +5499,7 @@ static void igb_rx_hwtstamp(struct igb_q_vector *q_vector, u32 staterr, * values must belong to this one here and therefore we don't need to * compare any of the additional attributes stored for it. * - * If nothing went wrong, then it should have a skb_shared_tx that we + * If nothing went wrong, then it should have a shared tx_flags that we * can turn into a skb_shared_hwtstamps. */ if (staterr & E1000_RXDADV_STAT_TSIP) { diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d8050382b18..f067c95cf18 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -163,26 +163,19 @@ struct skb_shared_hwtstamps { ktime_t syststamp; }; -/** - * struct skb_shared_tx - instructions for time stamping of outgoing packets - * @hardware: generate hardware time stamp - * @software: generate software time stamp - * @in_progress: device driver is going to provide - * hardware time stamp - * @prevent_sk_orphan: make sk reference available on driver level - * @flags: all shared_tx flags - * - * These flags are attached to packets as part of the - * &skb_shared_info. Use skb_tx() to get a pointer. - */ -union skb_shared_tx { - struct { - __u8 hardware:1, - software:1, - in_progress:1, - prevent_sk_orphan:1; - }; - __u8 flags; +/* Definitions for tx_flags in struct skb_shared_info */ +enum { + /* generate hardware time stamp */ + SKBTX_HW_TSTAMP = 1 << 0, + + /* generate software time stamp */ + SKBTX_SW_TSTAMP = 1 << 1, + + /* device driver is going to provide hardware time stamp */ + SKBTX_IN_PROGRESS = 1 << 2, + + /* ensure the originating sk reference is available on driver level */ + SKBTX_DRV_NEEDS_SK_REF = 1 << 3, }; /* This data is invariant across clones and lives at @@ -195,7 +188,7 @@ struct skb_shared_info { unsigned short gso_segs; unsigned short gso_type; __be32 ip6_frag_id; - union skb_shared_tx tx_flags; + __u8 tx_flags; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; @@ -587,11 +580,6 @@ static inline struct skb_shared_hwtstamps *skb_hwtstamps(struct sk_buff *skb) return &skb_shinfo(skb)->hwtstamps; } -static inline union skb_shared_tx *skb_tx(struct sk_buff *skb) -{ - return &skb_shinfo(skb)->tx_flags; -} - /** * skb_queue_empty - check if a queue is empty * @list: queue head @@ -1996,8 +1984,8 @@ extern void skb_tstamp_tx(struct sk_buff *orig_skb, static inline void sw_tx_timestamp(struct sk_buff *skb) { - union skb_shared_tx *shtx = skb_tx(skb); - if (shtx->software && !shtx->in_progress) + if (skb_shinfo(skb)->tx_flags & SKBTX_SW_TSTAMP && + !(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS)) skb_tstamp_tx(skb, NULL); } diff --git a/include/net/ip.h b/include/net/ip.h index 890f9725d68..7691aca133d 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -53,7 +53,7 @@ struct ipcm_cookie { __be32 addr; int oif; struct ip_options *opt; - union skb_shared_tx shtx; + __u8 tx_flags; }; #define IPCB(skb) ((struct inet_skb_parm*)((skb)->cb)) diff --git a/include/net/sock.h b/include/net/sock.h index ac53bfbdfe1..100e43bf95f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1669,17 +1669,13 @@ static inline void sock_recv_ts_and_drops(struct msghdr *msg, struct sock *sk, /** * sock_tx_timestamp - checks whether the outgoing packet is to be time stamped - * @msg: outgoing packet * @sk: socket sending this packet - * @shtx: filled with instructions for time stamping + * @tx_flags: filled with instructions for time stamping * * Currently only depends on SOCK_TIMESTAMPING* flags. Returns error code if * parameters are invalid. */ -extern int sock_tx_timestamp(struct msghdr *msg, - struct sock *sk, - union skb_shared_tx *shtx); - +extern int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags); /** * sk_eat_skb - Release a skb if it is no longer needed diff --git a/net/can/raw.c b/net/can/raw.c index a10e3338f08..7d77e67e57a 100644 --- a/net/can/raw.c +++ b/net/can/raw.c @@ -647,12 +647,12 @@ static int raw_sendmsg(struct kiocb *iocb, struct socket *sock, err = memcpy_fromiovec(skb_put(skb, size), msg->msg_iov, size); if (err < 0) goto free_skb; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto free_skb; /* to be able to check the received tx sock reference in raw_rcv() */ - skb_tx(skb)->prevent_sk_orphan = 1; + skb_shinfo(skb)->tx_flags |= SKBTX_DRV_NEEDS_SK_REF; skb->dev = dev; skb->sk = sk; diff --git a/net/core/dev.c b/net/core/dev.c index 586a11cb439..c1dc8a95f6f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1902,14 +1902,14 @@ static int dev_gso_segment(struct sk_buff *skb) /* * Try to orphan skb early, right before transmission by the device. - * We cannot orphan skb if tx timestamp is requested, since - * drivers need to call skb_tstamp_tx() to send the timestamp. + * We cannot orphan skb if tx timestamp is requested or the sk-reference + * is needed on driver level for other reasons, e.g. see net/can/raw.c */ static inline void skb_orphan_try(struct sk_buff *skb) { struct sock *sk = skb->sk; - if (sk && !skb_tx(skb)->flags) { + if (sk && !skb_shinfo(skb)->tx_flags) { /* skb_tx_hash() wont be able to get sk. * We copy sk_hash into skb->rxhash */ diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 3a2513f0d0c..99ef721f773 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -3016,7 +3016,7 @@ void skb_tstamp_tx(struct sk_buff *orig_skb, } else { /* * no hardware time stamps available, - * so keep the skb_shared_tx and only + * so keep the shared tx_flags and only * store software time stamp */ skb->tstamp = ktime_get_real(); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index a0d847c7cba..96bc7f9475a 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -379,7 +379,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) inet->tos = ip_hdr(skb)->tos; daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (icmp_param->replyopts.optlen) { ipc.opt = &icmp_param->replyopts; if (ipc.opt->srr) @@ -538,7 +538,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) inet_sk(sk)->tos = tos; ipc.addr = iph->saddr; ipc.opt = &icmp_param.replyopts; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; { struct flowi fl = { diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 04b69896df5..e807492f177 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -953,7 +953,7 @@ alloc_new_skb: else /* only the initial fragment is time stamped */ - ipc->shtx.flags = 0; + ipc->tx_flags = 0; } if (skb == NULL) goto error; @@ -964,7 +964,7 @@ alloc_new_skb: skb->ip_summed = csummode; skb->csum = 0; skb_reserve(skb, hh_len); - *skb_tx(skb) = ipc->shtx; + skb_shinfo(skb)->tx_flags = ipc->tx_flags; /* * Find where to start putting bytes. @@ -1384,7 +1384,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar daddr = ipc.addr = rt->rt_src; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (replyopts.opt.optlen) { ipc.opt = &replyopts.opt; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 009a7b2aa1e..1f85ef28989 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -505,7 +505,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; ipc.oif = sk->sk_bound_dev_if; if (msg->msg_controllen) { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 32e0bef60d0..86e757e162e 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -797,7 +797,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, return -EOPNOTSUPP; ipc.opt = NULL; - ipc.shtx.flags = 0; + ipc.tx_flags = 0; if (up->pending) { /* @@ -845,7 +845,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, ipc.addr = inet->inet_saddr; ipc.oif = sk->sk_bound_dev_if; - err = sock_tx_timestamp(msg, sk, &ipc.shtx); + err = sock_tx_timestamp(sk, &ipc.tx_flags); if (err) return err; if (msg->msg_controllen) { diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 9a17f28b125..3616f27b9d4 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -488,7 +488,7 @@ retry: skb->dev = dev; skb->priority = sk->sk_priority; skb->mark = sk->sk_mark; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_unlock; @@ -1209,7 +1209,7 @@ static int packet_snd(struct socket *sock, err = skb_copy_datagram_from_iovec(skb, offset, msg->msg_iov, 0, len); if (err) goto out_free; - err = sock_tx_timestamp(msg, sk, skb_tx(skb)); + err = sock_tx_timestamp(sk, &skb_shinfo(skb)->tx_flags); if (err < 0) goto out_free; diff --git a/net/socket.c b/net/socket.c index 2270b941bcc..7848d12f5e4 100644 --- a/net/socket.c +++ b/net/socket.c @@ -535,14 +535,13 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, - union skb_shared_tx *shtx) +int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { - shtx->flags = 0; + *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - shtx->hardware = 1; + *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - shtx->software = 1; + *tx_flags |= SKBTX_SW_TSTAMP; return 0; } EXPORT_SYMBOL(sock_tx_timestamp); -- cgit v1.2.3 From eb4d40654505e47aa9d2035bb97f631fa61d14b4 Mon Sep 17 00:00:00 2001 From: Grégoire Baron Date: Wed, 18 Aug 2010 13:10:35 +0000 Subject: net/sched: add ACT_CSUM action to update packets checksums net/sched: add ACT_CSUM action to update packets checksums ACT_CSUM can be called just after ACT_PEDIT in order to re-compute some altered checksums in IPv4 and IPv6 packets. The following checksums are supported by this patch: - IPv4: IPv4 header, ICMP, IGMP, TCP, UDP & UDPLite - IPv6: ICMPv6, TCP, UDP & UDPLite It's possible to request in the same action to update different kind of checksums, if the packets flow mix TCP, UDP and UDPLite, ... An example of usage is done in the associated iproute2 patch. Version 3 changes: - remove useless goto instructions - improve IPv6 hop options decoding Version 2 changes: - coding style correction - remove useless arguments of some functions - use stack in tcf_csum_dump() - add tcf_csum_skb_nextlayer() to factor code Signed-off-by: Gregoire Baron Acked-by: jamal Signed-off-by: David S. Miller --- include/linux/tc_act/Kbuild | 1 + include/linux/tc_act/tc_csum.h | 32 +++ include/net/tc_act/tc_csum.h | 15 ++ net/sched/Kconfig | 10 + net/sched/Makefile | 1 + net/sched/act_csum.c | 595 +++++++++++++++++++++++++++++++++++++++++ 6 files changed, 654 insertions(+) create mode 100644 include/linux/tc_act/tc_csum.h create mode 100644 include/net/tc_act/tc_csum.h create mode 100644 net/sched/act_csum.c (limited to 'include/net') diff --git a/include/linux/tc_act/Kbuild b/include/linux/tc_act/Kbuild index 76990937f4c..67b501c302b 100644 --- a/include/linux/tc_act/Kbuild +++ b/include/linux/tc_act/Kbuild @@ -4,3 +4,4 @@ header-y += tc_mirred.h header-y += tc_pedit.h header-y += tc_nat.h header-y += tc_skbedit.h +header-y += tc_csum.h diff --git a/include/linux/tc_act/tc_csum.h b/include/linux/tc_act/tc_csum.h new file mode 100644 index 00000000000..a047c49a315 --- /dev/null +++ b/include/linux/tc_act/tc_csum.h @@ -0,0 +1,32 @@ +#ifndef __LINUX_TC_CSUM_H +#define __LINUX_TC_CSUM_H + +#include +#include + +#define TCA_ACT_CSUM 16 + +enum { + TCA_CSUM_UNSPEC, + TCA_CSUM_PARMS, + TCA_CSUM_TM, + __TCA_CSUM_MAX +}; +#define TCA_CSUM_MAX (__TCA_CSUM_MAX - 1) + +enum { + TCA_CSUM_UPDATE_FLAG_IPV4HDR = 1, + TCA_CSUM_UPDATE_FLAG_ICMP = 2, + TCA_CSUM_UPDATE_FLAG_IGMP = 4, + TCA_CSUM_UPDATE_FLAG_TCP = 8, + TCA_CSUM_UPDATE_FLAG_UDP = 16, + TCA_CSUM_UPDATE_FLAG_UDPLITE = 32 +}; + +struct tc_csum { + tc_gen; + + __u32 update_flags; +}; + +#endif /* __LINUX_TC_CSUM_H */ diff --git a/include/net/tc_act/tc_csum.h b/include/net/tc_act/tc_csum.h new file mode 100644 index 00000000000..9e8710be7a0 --- /dev/null +++ b/include/net/tc_act/tc_csum.h @@ -0,0 +1,15 @@ +#ifndef __NET_TC_CSUM_H +#define __NET_TC_CSUM_H + +#include +#include + +struct tcf_csum { + struct tcf_common common; + + u32 update_flags; +}; +#define to_tcf_csum(pc) \ + container_of(pc,struct tcf_csum,common) + +#endif /* __NET_TC_CSUM_H */ diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 2f691fb180d..522d5a9a282 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -518,6 +518,16 @@ config NET_ACT_SKBEDIT To compile this code as a module, choose M here: the module will be called act_skbedit. +config NET_ACT_CSUM + tristate "Checksum Updating" + depends on NET_CLS_ACT + ---help--- + Say Y here to update some common checksum after some direct + packet alterations. + + To compile this code as a module, choose M here: the + module will be called act_csum. + config NET_CLS_IND bool "Incoming device classification" depends on NET_CLS_U32 || NET_CLS_FW diff --git a/net/sched/Makefile b/net/sched/Makefile index f14e71bfa58..960f5dba630 100644 --- a/net/sched/Makefile +++ b/net/sched/Makefile @@ -15,6 +15,7 @@ obj-$(CONFIG_NET_ACT_NAT) += act_nat.o obj-$(CONFIG_NET_ACT_PEDIT) += act_pedit.o obj-$(CONFIG_NET_ACT_SIMP) += act_simple.o obj-$(CONFIG_NET_ACT_SKBEDIT) += act_skbedit.o +obj-$(CONFIG_NET_ACT_CSUM) += act_csum.o obj-$(CONFIG_NET_SCH_FIFO) += sch_fifo.o obj-$(CONFIG_NET_SCH_CBQ) += sch_cbq.o obj-$(CONFIG_NET_SCH_HTB) += sch_htb.o diff --git a/net/sched/act_csum.c b/net/sched/act_csum.c new file mode 100644 index 00000000000..58d7f36949d --- /dev/null +++ b/net/sched/act_csum.c @@ -0,0 +1,595 @@ +/* + * Checksum updating actions + * + * Copyright (c) 2010 Gregoire Baron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or (at your option) + * any later version. + * + */ + +#include +#include +#include +#include +#include + +#include +#include +#include + +#include + +#include +#include +#include +#include +#include +#include +#include + +#include + +#include +#include + +#define CSUM_TAB_MASK 15 +static struct tcf_common *tcf_csum_ht[CSUM_TAB_MASK + 1]; +static u32 csum_idx_gen; +static DEFINE_RWLOCK(csum_lock); + +static struct tcf_hashinfo csum_hash_info = { + .htab = tcf_csum_ht, + .hmask = CSUM_TAB_MASK, + .lock = &csum_lock, +}; + +static const struct nla_policy csum_policy[TCA_CSUM_MAX + 1] = { + [TCA_CSUM_PARMS] = { .len = sizeof(struct tc_csum), }, +}; + +static int tcf_csum_init(struct nlattr *nla, struct nlattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct nlattr *tb[TCA_CSUM_MAX + 1]; + struct tc_csum *parm; + struct tcf_common *pc; + struct tcf_csum *p; + int ret = 0, err; + + if (nla == NULL) + return -EINVAL; + + err = nla_parse_nested(tb, TCA_CSUM_MAX, nla,csum_policy); + if (err < 0) + return err; + + if (tb[TCA_CSUM_PARMS] == NULL) + return -EINVAL; + parm = nla_data(tb[TCA_CSUM_PARMS]); + + pc = tcf_hash_check(parm->index, a, bind, &csum_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, &csum_idx_gen, &csum_hash_info); + if (IS_ERR(pc)) + return PTR_ERR(pc); + p = to_tcf_csum(pc); + ret = ACT_P_CREATED; + } else { + p = to_tcf_csum(pc); + if (!ovr) { + tcf_hash_release(pc, bind, &csum_hash_info); + return -EEXIST; + } + } + + spin_lock_bh(&p->tcf_lock); + p->tcf_action = parm->action; + p->update_flags = parm->update_flags; + spin_unlock_bh(&p->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &csum_hash_info); + + return ret; +} + +static int tcf_csum_cleanup(struct tc_action *a, int bind) +{ + struct tcf_csum *p = a->priv; + return tcf_hash_release(&p->common, bind, &csum_hash_info); +} + +/** + * tcf_csum_skb_nextlayer - Get next layer pointer + * @skb: sk_buff to use + * @ihl: previous summed headers length + * @ipl: complete packet length + * @jhl: next header length + * + * Check the expected next layer availability in the specified sk_buff. + * Return the next layer pointer if pass, NULL otherwise. + */ +static void *tcf_csum_skb_nextlayer(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl, + unsigned int jhl) +{ + int ntkoff = skb_network_offset(skb); + int hl = ihl + jhl; + + if (!pskb_may_pull(skb, ipl + ntkoff) || (ipl < hl) || + (skb_cloned(skb) && + !skb_clone_writable(skb, hl + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC))) + return NULL; + else + return (void *)(skb_network_header(skb) + ihl); +} + +static int tcf_csum_ipv4_icmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct icmphdr *icmph; + + icmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmph)); + if (icmph == NULL) + return 0; + + icmph->checksum = 0; + skb->csum = csum_partial(icmph, ipl - ihl, 0); + icmph->checksum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_igmp(struct sk_buff *skb, + unsigned int ihl, unsigned int ipl) +{ + struct igmphdr *igmph; + + igmph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*igmph)); + if (igmph == NULL) + return 0; + + igmph->csum = 0; + skb->csum = csum_partial(igmph, ipl - ihl, 0); + igmph->csum = csum_fold(skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_icmp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct icmp6hdr *icmp6h; + + icmp6h = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*icmp6h)); + if (icmp6h == NULL) + return 0; + + icmp6h->icmp6_cksum = 0; + skb->csum = csum_partial(icmp6h, ipl - ihl, 0); + icmp6h->icmp6_cksum = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_ICMPV6, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_tcp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = tcp_v4_check(ipl - ihl, + iph->saddr, iph->daddr, skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv6_tcp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl) +{ + struct tcphdr *tcph; + + tcph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*tcph)); + if (tcph == NULL) + return 0; + + tcph->check = 0; + skb->csum = csum_partial(tcph, ipl - ihl, 0); + tcph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, + ipl - ihl, IPPROTO_TCP, + skb->csum); + + skb->ip_summed = CHECKSUM_NONE; + + return 1; +} + +static int tcf_csum_ipv4_udp(struct sk_buff *skb, struct iphdr *iph, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use iph->tot_len, or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + if (udplite || udph->check) { + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_tcpudp_magic(iph->saddr, iph->daddr, + ul, iph->protocol, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + } + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv6_udp(struct sk_buff *skb, struct ipv6hdr *ip6h, + unsigned int ihl, unsigned int ipl, int udplite) +{ + struct udphdr *udph; + u16 ul; + + /* Support both UDP and UDPLITE checksum algorithms, + * Don't use udph->len to get the real length without any protocol check, + * UDPLITE uses udph->len for another thing, + * Use ip6h->payload_len + sizeof(*ip6h) ... , or just ipl. + */ + + udph = tcf_csum_skb_nextlayer(skb, ihl, ipl, sizeof(*udph)); + if (udph == NULL) + return 0; + + ul = ntohs(udph->len); + + udph->check = 0; + + if (udplite) { + if (ul == 0) + skb->csum = csum_partial(udph, ipl - ihl, 0); + + else if ((ul >= sizeof(*udph)) && (ul <= ipl - ihl)) + skb->csum = csum_partial(udph, ul, 0); + + else + goto ignore_obscure_skb; + } else { + if (ul != ipl - ihl) + goto ignore_obscure_skb; + + skb->csum = csum_partial(udph, ul, 0); + } + + udph->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr, ul, + udplite ? IPPROTO_UDPLITE : IPPROTO_UDP, + skb->csum); + + if (!udph->check) + udph->check = CSUM_MANGLED_0; + + skb->ip_summed = CHECKSUM_NONE; + +ignore_obscure_skb: + return 1; +} + +static int tcf_csum_ipv4(struct sk_buff *skb, u32 update_flags) +{ + struct iphdr *iph; + int ntkoff; + + ntkoff = skb_network_offset(skb); + + if (!pskb_may_pull(skb, sizeof(*iph) + ntkoff)) + goto fail; + + iph = ip_hdr(skb); + + switch (iph->frag_off & htons(IP_OFFSET) ? 0 : iph->protocol) { + case IPPROTO_ICMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv4_icmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_IGMP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_IGMP) + if (!tcf_csum_ipv4_igmp(skb, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv4_tcp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len))) + goto fail; + break; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 0)) + goto fail; + break; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv4_udp(skb, iph, + iph->ihl * 4, ntohs(iph->tot_len), 1)) + goto fail; + break; + } + + if (update_flags & TCA_CSUM_UPDATE_FLAG_IPV4HDR) { + if (skb_cloned(skb) && + !skb_clone_writable(skb, sizeof(*iph) + ntkoff) && + pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) + goto fail; + + ip_send_check(iph); + } + + return 1; + +fail: + return 0; +} + +static int tcf_csum_ipv6_hopopts(struct ipv6_opt_hdr *ip6xh, + unsigned int ixhl, unsigned int *pl) +{ + int off, len, optlen; + unsigned char *xh = (void *)ip6xh; + + off = sizeof(*ip6xh); + len = ixhl - off; + + while (len > 1) { + switch (xh[off]) + { + case IPV6_TLV_PAD0: + optlen = 1; + break; + case IPV6_TLV_JUMBO: + optlen = xh[off + 1] + 2; + if (optlen != 6 || len < 6 || (off & 3) != 2) + /* wrong jumbo option length/alignment */ + return 0; + *pl = ntohl(*(__be32 *)(xh + off + 2)); + goto done; + default: + optlen = xh[off + 1] + 2; + if (optlen > len) + /* ignore obscure options */ + goto done; + break; + } + off += optlen; + len -= optlen; + } + +done: + return 1; +} + +static int tcf_csum_ipv6(struct sk_buff *skb, u32 update_flags) +{ + struct ipv6hdr *ip6h; + struct ipv6_opt_hdr *ip6xh; + unsigned int hl, ixhl; + unsigned int pl; + int ntkoff; + u8 nexthdr; + + ntkoff = skb_network_offset(skb); + + hl = sizeof(*ip6h); + + if (!pskb_may_pull(skb, hl + ntkoff)) + goto fail; + + ip6h = ipv6_hdr(skb); + + pl = ntohs(ip6h->payload_len); + nexthdr = ip6h->nexthdr; + + do { + switch (nexthdr) { + case NEXTHDR_FRAGMENT: + goto ignore_skb; + case NEXTHDR_ROUTING: + case NEXTHDR_HOP: + case NEXTHDR_DEST: + if (!pskb_may_pull(skb, hl + sizeof(*ip6xh) + ntkoff)) + goto fail; + ip6xh = (void *)(skb_network_header(skb) + hl); + ixhl = ipv6_optlen(ip6xh); + if (!pskb_may_pull(skb, hl + ixhl + ntkoff)) + goto fail; + if ((nexthdr == NEXTHDR_HOP) && + !(tcf_csum_ipv6_hopopts(ip6xh, ixhl, &pl))) + goto fail; + nexthdr = ip6xh->nexthdr; + hl += ixhl; + break; + case IPPROTO_ICMPV6: + if (update_flags & TCA_CSUM_UPDATE_FLAG_ICMP) + if (!tcf_csum_ipv6_icmp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_TCP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_TCP) + if (!tcf_csum_ipv6_tcp(skb, ip6h, + hl, pl + sizeof(*ip6h))) + goto fail; + goto done; + case IPPROTO_UDP: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDP) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 0)) + goto fail; + goto done; + case IPPROTO_UDPLITE: + if (update_flags & TCA_CSUM_UPDATE_FLAG_UDPLITE) + if (!tcf_csum_ipv6_udp(skb, ip6h, + hl, pl + sizeof(*ip6h), 1)) + goto fail; + goto done; + default: + goto ignore_skb; + } + } while (pskb_may_pull(skb, hl + 1 + ntkoff)); + +done: +ignore_skb: + return 1; + +fail: + return 0; +} + +static int tcf_csum(struct sk_buff *skb, + struct tc_action *a, struct tcf_result *res) +{ + struct tcf_csum *p = a->priv; + int action; + u32 update_flags; + + spin_lock(&p->tcf_lock); + p->tcf_tm.lastuse = jiffies; + p->tcf_bstats.bytes += qdisc_pkt_len(skb); + p->tcf_bstats.packets++; + action = p->tcf_action; + update_flags = p->update_flags; + spin_unlock(&p->tcf_lock); + + if (unlikely(action == TC_ACT_SHOT)) + goto drop; + + switch (skb->protocol) { + case cpu_to_be16(ETH_P_IP): + if (!tcf_csum_ipv4(skb, update_flags)) + goto drop; + break; + case cpu_to_be16(ETH_P_IPV6): + if (!tcf_csum_ipv6(skb, update_flags)) + goto drop; + break; + } + + return action; + +drop: + spin_lock(&p->tcf_lock); + p->tcf_qstats.drops++; + spin_unlock(&p->tcf_lock); + return TC_ACT_SHOT; +} + +static int tcf_csum_dump(struct sk_buff *skb, + struct tc_action *a, int bind, int ref) +{ + unsigned char *b = skb_tail_pointer(skb); + struct tcf_csum *p = a->priv; + struct tc_csum opt = { + .update_flags = p->update_flags, + + .index = p->tcf_index, + .action = p->tcf_action, + .refcnt = p->tcf_refcnt - ref, + .bindcnt = p->tcf_bindcnt - bind, + }; + struct tcf_t t; + + NLA_PUT(skb, TCA_CSUM_PARMS, sizeof(opt), &opt); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); + NLA_PUT(skb, TCA_CSUM_TM, sizeof(t), &t); + + return skb->len; + +nla_put_failure: + nlmsg_trim(skb, b); + return -1; +} + +static struct tc_action_ops act_csum_ops = { + .kind = "csum", + .hinfo = &csum_hash_info, + .type = TCA_ACT_CSUM, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_csum, + .dump = tcf_csum_dump, + .cleanup = tcf_csum_cleanup, + .lookup = tcf_hash_search, + .init = tcf_csum_init, + .walk = tcf_generic_walker +}; + +MODULE_DESCRIPTION("Checksum updating actions"); +MODULE_LICENSE("GPL"); + +static int __init csum_init_module(void) +{ + return tcf_register_action(&act_csum_ops); +} + +static void __exit csum_cleanup_module(void) +{ + tcf_unregister_action(&act_csum_ops); +} + +module_init(csum_init_module); +module_exit(csum_cleanup_module); -- cgit v1.2.3 From 00959ade36acadc00e757f87060bf6e4501d545f Mon Sep 17 00:00:00 2001 From: Dmitry Kozlov Date: Sat, 21 Aug 2010 23:05:39 -0700 Subject: PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol) PPP: introduce "pptp" module which implements point-to-point tunneling protocol using pppox framework NET: introduce the "gre" module for demultiplexing GRE packets on version criteria (required to pptp and ip_gre may coexists) NET: ip_gre: update to use the "gre" module This patch introduces then pptp support to the linux kernel which dramatically speeds up pptp vpn connections and decreases cpu usage in comparison of existing user-space implementation (poptop/pptpclient). There is accel-pptp project (https://sourceforge.net/projects/accel-pptp/) to utilize this module, it contains plugin for pppd to use pptp in client-mode and modified pptpd (poptop) to build high-performance pptp NAS. There was many changes from initial submitted patch, most important are: 1. using rcu instead of read-write locks 2. using static bitmap instead of dynamically allocated 3. using vmalloc for memory allocation instead of BITS_PER_LONG + __get_free_pages 4. fixed many coding style issues Thanks to Eric Dumazet. Signed-off-by: Dmitry Kozlov Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- MAINTAINERS | 14 + drivers/net/Kconfig | 11 + drivers/net/Makefile | 1 + drivers/net/pptp.c | 726 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/if_pppox.h | 52 ++-- include/net/gre.h | 18 ++ net/ipv4/Kconfig | 7 + net/ipv4/Makefile | 1 + net/ipv4/gre.c | 151 ++++++++++ net/ipv4/ip_gre.c | 14 +- 10 files changed, 971 insertions(+), 24 deletions(-) create mode 100644 drivers/net/pptp.c create mode 100644 include/net/gre.h create mode 100644 net/ipv4/gre.c (limited to 'include/net') diff --git a/MAINTAINERS b/MAINTAINERS index b5b8baa1d70..43c9efcd8e1 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -6528,6 +6528,20 @@ M: "Maciej W. Rozycki" S: Maintained F: drivers/serial/zs.* +GRE DEMULTIPLEXER DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: net/ipv4/gre.c +F: include/net/gre.h + +PPTP DRIVER +M: Dmitry Kozlov +L: netdev@vger.kernel.org +S: Maintained +F: drivers/net/pptp.c +W: http://sourceforge.net/projects/accel-pptp + THE REST M: Linus Torvalds L: linux-kernel@vger.kernel.org diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 5a6895320b4..9b2a72089a6 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -3192,6 +3192,17 @@ config PPPOE which contains instruction on how to use this driver (under the heading "Kernel mode PPPoE"). +config PPTP + tristate "PPP over IPv4 (PPTP) (EXPERIMENTAL)" + depends on EXPERIMENTAL && PPP && NET_IPGRE_DEMUX + help + Support for PPP over IPv4.(Point-to-Point Tunneling Protocol) + + This driver requires pppd plugin to work in client mode or + modified pptpd (poptop) to work in server mode. + See http://accel-pptp.sourceforge.net/ for information how to + utilize this module. + config PPPOATM tristate "PPP over ATM" depends on ATM && PPP diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 56e8c27f77c..0b371083d48 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -162,6 +162,7 @@ obj-$(CONFIG_PPP_BSDCOMP) += bsd_comp.o obj-$(CONFIG_PPP_MPPE) += ppp_mppe.o obj-$(CONFIG_PPPOE) += pppox.o pppoe.o obj-$(CONFIG_PPPOL2TP) += pppox.o +obj-$(CONFIG_PPTP) += pppox.o pptp.o obj-$(CONFIG_SLIP) += slip.o obj-$(CONFIG_SLHC) += slhc.o diff --git a/drivers/net/pptp.c b/drivers/net/pptp.c new file mode 100644 index 00000000000..761f0eced72 --- /dev/null +++ b/drivers/net/pptp.c @@ -0,0 +1,726 @@ +/* + * Point-to-Point Tunneling Protocol for Linux + * + * Authors: Dmitry Kozlov + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include + +#include + +#define PPTP_DRIVER_VERSION "0.8.5" + +#define MAX_CALLID 65535 + +static DECLARE_BITMAP(callid_bitmap, MAX_CALLID + 1); +static struct pppox_sock **callid_sock; + +static DEFINE_SPINLOCK(chan_lock); + +static struct proto pptp_sk_proto __read_mostly; +static struct ppp_channel_ops pptp_chan_ops; +static const struct proto_ops pptp_ops; + +#define PPP_LCP_ECHOREQ 0x09 +#define PPP_LCP_ECHOREP 0x0A +#define SC_RCV_BITS (SC_RCV_B7_1|SC_RCV_B7_0|SC_RCV_ODDP|SC_RCV_EVNP) + +#define MISSING_WINDOW 20 +#define WRAPPED(curseq, lastseq)\ + ((((curseq) & 0xffffff00) == 0) &&\ + (((lastseq) & 0xffffff00) == 0xffffff00)) + +#define PPTP_GRE_PROTO 0x880B +#define PPTP_GRE_VER 0x1 + +#define PPTP_GRE_FLAG_C 0x80 +#define PPTP_GRE_FLAG_R 0x40 +#define PPTP_GRE_FLAG_K 0x20 +#define PPTP_GRE_FLAG_S 0x10 +#define PPTP_GRE_FLAG_A 0x80 + +#define PPTP_GRE_IS_C(f) ((f)&PPTP_GRE_FLAG_C) +#define PPTP_GRE_IS_R(f) ((f)&PPTP_GRE_FLAG_R) +#define PPTP_GRE_IS_K(f) ((f)&PPTP_GRE_FLAG_K) +#define PPTP_GRE_IS_S(f) ((f)&PPTP_GRE_FLAG_S) +#define PPTP_GRE_IS_A(f) ((f)&PPTP_GRE_FLAG_A) + +#define PPTP_HEADER_OVERHEAD (2+sizeof(struct pptp_gre_header)) +struct pptp_gre_header { + u8 flags; + u8 ver; + u16 protocol; + u16 payload_len; + u16 call_id; + u32 seq; + u32 ack; +} __packed; + +static struct pppox_sock *lookup_chan(u16 call_id, __be32 s_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + + rcu_read_lock(); + sock = rcu_dereference(callid_sock[call_id]); + if (sock) { + opt = &sock->proto.pptp; + if (opt->dst_addr.sin_addr.s_addr != s_addr) + sock = NULL; + else + sock_hold(sk_pppox(sock)); + } + rcu_read_unlock(); + + return sock; +} + +static int lookup_chan_dst(u16 call_id, __be32 d_addr) +{ + struct pppox_sock *sock; + struct pptp_opt *opt; + int i; + + rcu_read_lock(); + for (i = find_next_bit(callid_bitmap, MAX_CALLID, 1); i < MAX_CALLID; + i = find_next_bit(callid_bitmap, MAX_CALLID, i + 1)) { + sock = rcu_dereference(callid_sock[i]); + if (!sock) + continue; + opt = &sock->proto.pptp; + if (opt->dst_addr.call_id == call_id && + opt->dst_addr.sin_addr.s_addr == d_addr) + break; + } + rcu_read_unlock(); + + return i < MAX_CALLID; +} + +static int add_chan(struct pppox_sock *sock) +{ + static int call_id; + + spin_lock(&chan_lock); + if (!sock->proto.pptp.src_addr.call_id) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, call_id + 1); + if (call_id == MAX_CALLID) { + call_id = find_next_zero_bit(callid_bitmap, MAX_CALLID, 1); + if (call_id == MAX_CALLID) + goto out_err; + } + sock->proto.pptp.src_addr.call_id = call_id; + } else if (test_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap)) + goto out_err; + + set_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], sock); + spin_unlock(&chan_lock); + + return 0; + +out_err: + spin_unlock(&chan_lock); + return -1; +} + +static void del_chan(struct pppox_sock *sock) +{ + spin_lock(&chan_lock); + clear_bit(sock->proto.pptp.src_addr.call_id, callid_bitmap); + rcu_assign_pointer(callid_sock[sock->proto.pptp.src_addr.call_id], NULL); + spin_unlock(&chan_lock); + synchronize_rcu(); +} + +static int pptp_xmit(struct ppp_channel *chan, struct sk_buff *skb) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct pptp_gre_header *hdr; + unsigned int header_len = sizeof(*hdr); + int err = 0; + int islcp; + int len; + unsigned char *data; + __u32 seq_recv; + + + struct rtable *rt; + struct net_device *tdev; + struct iphdr *iph; + int max_headroom; + + if (sk_pppox(po)->sk_state & PPPOX_DEAD) + goto tx_error; + + { + struct flowi fl = { .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_TOS(0) } }, + .proto = IPPROTO_GRE }; + err = ip_route_output_key(&init_net, &rt, &fl); + if (err) + goto tx_error; + } + tdev = rt->dst.dev; + + max_headroom = LL_RESERVED_SPACE(tdev) + sizeof(*iph) + sizeof(*hdr) + 2; + + if (skb_headroom(skb) < max_headroom || skb_cloned(skb) || skb_shared(skb)) { + struct sk_buff *new_skb = skb_realloc_headroom(skb, max_headroom); + if (!new_skb) { + ip_rt_put(rt); + goto tx_error; + } + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + kfree_skb(skb); + skb = new_skb; + } + + data = skb->data; + islcp = ((data[0] << 8) + data[1]) == PPP_LCP && 1 <= data[2] && data[2] <= 7; + + /* compress protocol field */ + if ((opt->ppp_flags & SC_COMP_PROT) && data[0] == 0 && !islcp) + skb_pull(skb, 1); + + /* Put in the address/control bytes if necessary */ + if ((opt->ppp_flags & SC_COMP_AC) == 0 || islcp) { + data = skb_push(skb, 2); + data[0] = PPP_ALLSTATIONS; + data[1] = PPP_UI; + } + + len = skb->len; + + seq_recv = opt->seq_recv; + + if (opt->ack_sent == seq_recv) + header_len -= sizeof(hdr->ack); + + /* Push down and install GRE header */ + skb_push(skb, header_len); + hdr = (struct pptp_gre_header *)(skb->data); + + hdr->flags = PPTP_GRE_FLAG_K; + hdr->ver = PPTP_GRE_VER; + hdr->protocol = htons(PPTP_GRE_PROTO); + hdr->call_id = htons(opt->dst_addr.call_id); + + hdr->flags |= PPTP_GRE_FLAG_S; + hdr->seq = htonl(++opt->seq_sent); + if (opt->ack_sent != seq_recv) { + /* send ack with this message */ + hdr->ver |= PPTP_GRE_FLAG_A; + hdr->ack = htonl(seq_recv); + opt->ack_sent = seq_recv; + } + hdr->payload_len = htons(len); + + /* Push down and install the IP header. */ + + skb_reset_transport_header(skb); + skb_push(skb, sizeof(*iph)); + skb_reset_network_header(skb); + memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt)); + IPCB(skb)->flags &= ~(IPSKB_XFRM_TUNNEL_SIZE | IPSKB_XFRM_TRANSFORMED | IPSKB_REROUTED); + + iph = ip_hdr(skb); + iph->version = 4; + iph->ihl = sizeof(struct iphdr) >> 2; + if (ip_dont_fragment(sk, &rt->dst)) + iph->frag_off = htons(IP_DF); + else + iph->frag_off = 0; + iph->protocol = IPPROTO_GRE; + iph->tos = 0; + iph->daddr = rt->rt_dst; + iph->saddr = rt->rt_src; + iph->ttl = dst_metric(&rt->dst, RTAX_HOPLIMIT); + iph->tot_len = htons(skb->len); + + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + + nf_reset(skb); + + skb->ip_summed = CHECKSUM_NONE; + ip_select_ident(iph, &rt->dst, NULL); + ip_send_check(iph); + + ip_local_out(skb); + +tx_error: + return 1; +} + +static int pptp_rcv_core(struct sock *sk, struct sk_buff *skb) +{ + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int headersize, payload_len, seq; + __u8 *payload; + struct pptp_gre_header *header; + + if (!(sk->sk_state & PPPOX_CONNECTED)) { + if (sock_queue_rcv_skb(sk, skb)) + goto drop; + return NET_RX_SUCCESS; + } + + header = (struct pptp_gre_header *)(skb->data); + + /* test if acknowledgement present */ + if (PPTP_GRE_IS_A(header->ver)) { + __u32 ack = (PPTP_GRE_IS_S(header->flags)) ? + header->ack : header->seq; /* ack in different place if S = 0 */ + + ack = ntohl(ack); + + if (ack > opt->ack_recv) + opt->ack_recv = ack; + /* also handle sequence number wrap-around */ + if (WRAPPED(ack, opt->ack_recv)) + opt->ack_recv = ack; + } + + /* test if payload present */ + if (!PPTP_GRE_IS_S(header->flags)) + goto drop; + + headersize = sizeof(*header); + payload_len = ntohs(header->payload_len); + seq = ntohl(header->seq); + + /* no ack present? */ + if (!PPTP_GRE_IS_A(header->ver)) + headersize -= sizeof(header->ack); + /* check for incomplete packet (length smaller than expected) */ + if (skb->len - headersize < payload_len) + goto drop; + + payload = skb->data + headersize; + /* check for expected sequence number */ + if (seq < opt->seq_recv + 1 || WRAPPED(opt->seq_recv, seq)) { + if ((payload[0] == PPP_ALLSTATIONS) && (payload[1] == PPP_UI) && + (PPP_PROTOCOL(payload) == PPP_LCP) && + ((payload[4] == PPP_LCP_ECHOREQ) || (payload[4] == PPP_LCP_ECHOREP))) + goto allow_packet; + } else { + opt->seq_recv = seq; +allow_packet: + skb_pull(skb, headersize); + + if (payload[0] == PPP_ALLSTATIONS && payload[1] == PPP_UI) { + /* chop off address/control */ + if (skb->len < 3) + goto drop; + skb_pull(skb, 2); + } + + if ((*skb->data) & 1) { + /* protocol is compressed */ + skb_push(skb, 1)[0] = 0; + } + + skb->ip_summed = CHECKSUM_NONE; + skb_set_network_header(skb, skb->head-skb->data); + ppp_input(&po->chan, skb); + + return NET_RX_SUCCESS; + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_rcv(struct sk_buff *skb) +{ + struct pppox_sock *po; + struct pptp_gre_header *header; + struct iphdr *iph; + + if (skb->pkt_type != PACKET_HOST) + goto drop; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + iph = ip_hdr(skb); + + header = (struct pptp_gre_header *)skb->data; + + if (ntohs(header->protocol) != PPTP_GRE_PROTO || /* PPTP-GRE protocol for PPTP */ + PPTP_GRE_IS_C(header->flags) || /* flag C should be clear */ + PPTP_GRE_IS_R(header->flags) || /* flag R should be clear */ + !PPTP_GRE_IS_K(header->flags) || /* flag K should be set */ + (header->flags&0xF) != 0) /* routing and recursion ctrl = 0 */ + /* if invalid, discard this packet */ + goto drop; + + po = lookup_chan(htons(header->call_id), iph->saddr); + if (po) { + skb_dst_drop(skb); + nf_reset(skb); + return sk_receive_skb(sk_pppox(po), skb, 0); + } +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static int pptp_bind(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + int error = 0; + + lock_sock(sk); + + opt->src_addr = sp->sa_addr.pptp; + if (add_chan(po)) { + release_sock(sk); + error = -EBUSY; + } + + release_sock(sk); + return error; +} + +static int pptp_connect(struct socket *sock, struct sockaddr *uservaddr, + int sockaddr_len, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_pppox *sp = (struct sockaddr_pppox *) uservaddr; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + struct rtable *rt; + int error = 0; + + if (sp->sa_protocol != PX_PROTO_PPTP) + return -EINVAL; + + if (lookup_chan_dst(sp->sa_addr.pptp.call_id, sp->sa_addr.pptp.sin_addr.s_addr)) + return -EALREADY; + + lock_sock(sk); + /* Check for already bound sockets */ + if (sk->sk_state & PPPOX_CONNECTED) { + error = -EBUSY; + goto end; + } + + /* Check for already disconnected sockets, on attempts to disconnect */ + if (sk->sk_state & PPPOX_DEAD) { + error = -EALREADY; + goto end; + } + + if (!opt->src_addr.sin_addr.s_addr || !sp->sa_addr.pptp.sin_addr.s_addr) { + error = -EINVAL; + goto end; + } + + po->chan.private = sk; + po->chan.ops = &pptp_chan_ops; + + { + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = opt->dst_addr.sin_addr.s_addr, + .saddr = opt->src_addr.sin_addr.s_addr, + .tos = RT_CONN_FLAGS(sk) } }, + .proto = IPPROTO_GRE }; + security_sk_classify_flow(sk, &fl); + if (ip_route_output_key(&init_net, &rt, &fl)) { + error = -EHOSTUNREACH; + goto end; + } + sk_setup_caps(sk, &rt->dst); + } + po->chan.mtu = dst_mtu(&rt->dst); + if (!po->chan.mtu) + po->chan.mtu = PPP_MTU; + ip_rt_put(rt); + po->chan.mtu -= PPTP_HEADER_OVERHEAD; + + po->chan.hdrlen = 2 + sizeof(struct pptp_gre_header); + error = ppp_register_channel(&po->chan); + if (error) { + pr_err("PPTP: failed to register PPP channel (%d)\n", error); + goto end; + } + + opt->dst_addr = sp->sa_addr.pptp; + sk->sk_state = PPPOX_CONNECTED; + + end: + release_sock(sk); + return error; +} + +static int pptp_getname(struct socket *sock, struct sockaddr *uaddr, + int *usockaddr_len, int peer) +{ + int len = sizeof(struct sockaddr_pppox); + struct sockaddr_pppox sp; + + sp.sa_family = AF_PPPOX; + sp.sa_protocol = PX_PROTO_PPTP; + sp.sa_addr.pptp = pppox_sk(sock->sk)->proto.pptp.src_addr; + + memcpy(uaddr, &sp, len); + + *usockaddr_len = len; + + return 0; +} + +static int pptp_release(struct socket *sock) +{ + struct sock *sk = sock->sk; + struct pppox_sock *po; + struct pptp_opt *opt; + int error = 0; + + if (!sk) + return 0; + + lock_sock(sk); + + if (sock_flag(sk, SOCK_DEAD)) { + release_sock(sk); + return -EBADF; + } + + po = pppox_sk(sk); + opt = &po->proto.pptp; + del_chan(po); + + pppox_unbind_sock(sk); + sk->sk_state = PPPOX_DEAD; + + sock_orphan(sk); + sock->sk = NULL; + + release_sock(sk); + sock_put(sk); + + return error; +} + +static void pptp_sock_destruct(struct sock *sk) +{ + if (!(sk->sk_state & PPPOX_DEAD)) { + del_chan(pppox_sk(sk)); + pppox_unbind_sock(sk); + } + skb_queue_purge(&sk->sk_receive_queue); +} + +static int pptp_create(struct net *net, struct socket *sock) +{ + int error = -ENOMEM; + struct sock *sk; + struct pppox_sock *po; + struct pptp_opt *opt; + + sk = sk_alloc(net, PF_PPPOX, GFP_KERNEL, &pptp_sk_proto); + if (!sk) + goto out; + + sock_init_data(sock, sk); + + sock->state = SS_UNCONNECTED; + sock->ops = &pptp_ops; + + sk->sk_backlog_rcv = pptp_rcv_core; + sk->sk_state = PPPOX_NONE; + sk->sk_type = SOCK_STREAM; + sk->sk_family = PF_PPPOX; + sk->sk_protocol = PX_PROTO_PPTP; + sk->sk_destruct = pptp_sock_destruct; + + po = pppox_sk(sk); + opt = &po->proto.pptp; + + opt->seq_sent = 0; opt->seq_recv = 0; + opt->ack_recv = 0; opt->ack_sent = 0; + + error = 0; +out: + return error; +} + +static int pptp_ppp_ioctl(struct ppp_channel *chan, unsigned int cmd, + unsigned long arg) +{ + struct sock *sk = (struct sock *) chan->private; + struct pppox_sock *po = pppox_sk(sk); + struct pptp_opt *opt = &po->proto.pptp; + void __user *argp = (void __user *)arg; + int __user *p = argp; + int err, val; + + err = -EFAULT; + switch (cmd) { + case PPPIOCGFLAGS: + val = opt->ppp_flags; + if (put_user(val, p)) + break; + err = 0; + break; + case PPPIOCSFLAGS: + if (get_user(val, p)) + break; + opt->ppp_flags = val & ~SC_RCV_BITS; + err = 0; + break; + default: + err = -ENOTTY; + } + + return err; +} + +static struct ppp_channel_ops pptp_chan_ops = { + .start_xmit = pptp_xmit, + .ioctl = pptp_ppp_ioctl, +}; + +static struct proto pptp_sk_proto __read_mostly = { + .name = "PPTP", + .owner = THIS_MODULE, + .obj_size = sizeof(struct pppox_sock), +}; + +static const struct proto_ops pptp_ops = { + .family = AF_PPPOX, + .owner = THIS_MODULE, + .release = pptp_release, + .bind = pptp_bind, + .connect = pptp_connect, + .socketpair = sock_no_socketpair, + .accept = sock_no_accept, + .getname = pptp_getname, + .poll = sock_no_poll, + .listen = sock_no_listen, + .shutdown = sock_no_shutdown, + .setsockopt = sock_no_setsockopt, + .getsockopt = sock_no_getsockopt, + .sendmsg = sock_no_sendmsg, + .recvmsg = sock_no_recvmsg, + .mmap = sock_no_mmap, + .ioctl = pppox_ioctl, +}; + +static struct pppox_proto pppox_pptp_proto = { + .create = pptp_create, + .owner = THIS_MODULE, +}; + +static struct gre_protocol gre_pptp_protocol = { + .handler = pptp_rcv, +}; + +static int __init pptp_init_module(void) +{ + int err = 0; + pr_info("PPTP driver version " PPTP_DRIVER_VERSION "\n"); + + callid_sock = __vmalloc((MAX_CALLID + 1) * sizeof(void *), + GFP_KERNEL | __GFP_ZERO, PAGE_KERNEL); + if (!callid_sock) { + pr_err("PPTP: cann't allocate memory\n"); + return -ENOMEM; + } + + err = gre_add_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + if (err) { + pr_err("PPTP: can't add gre protocol\n"); + goto out_mem_free; + } + + err = proto_register(&pptp_sk_proto, 0); + if (err) { + pr_err("PPTP: can't register sk_proto\n"); + goto out_gre_del_protocol; + } + + err = register_pppox_proto(PX_PROTO_PPTP, &pppox_pptp_proto); + if (err) { + pr_err("PPTP: can't register pppox_proto\n"); + goto out_unregister_sk_proto; + } + + return 0; + +out_unregister_sk_proto: + proto_unregister(&pptp_sk_proto); +out_gre_del_protocol: + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); +out_mem_free: + vfree(callid_sock); + + return err; +} + +static void __exit pptp_exit_module(void) +{ + unregister_pppox_proto(PX_PROTO_PPTP); + proto_unregister(&pptp_sk_proto); + gre_del_protocol(&gre_pptp_protocol, GREPROTO_PPTP); + vfree(callid_sock); +} + +module_init(pptp_init_module); +module_exit(pptp_exit_module); + +MODULE_DESCRIPTION("Point-to-Point Tunneling Protocol"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); diff --git a/include/linux/if_pppox.h b/include/linux/if_pppox.h index 1925e0c3f16..1525b2156b2 100644 --- a/include/linux/if_pppox.h +++ b/include/linux/if_pppox.h @@ -40,25 +40,35 @@ * PPPoE addressing definition */ typedef __be16 sid_t; -struct pppoe_addr{ - sid_t sid; /* Session identifier */ - unsigned char remote[ETH_ALEN]; /* Remote address */ - char dev[IFNAMSIZ]; /* Local device to use */ +struct pppoe_addr { + sid_t sid; /* Session identifier */ + unsigned char remote[ETH_ALEN]; /* Remote address */ + char dev[IFNAMSIZ]; /* Local device to use */ }; /************************************************************************ - * Protocols supported by AF_PPPOX - */ + * PPTP addressing definition + */ +struct pptp_addr { + u16 call_id; + struct in_addr sin_addr; +}; + +/************************************************************************ + * Protocols supported by AF_PPPOX + */ #define PX_PROTO_OE 0 /* Currently just PPPoE */ #define PX_PROTO_OL2TP 1 /* Now L2TP also */ -#define PX_MAX_PROTO 2 - -struct sockaddr_pppox { - sa_family_t sa_family; /* address family, AF_PPPOX */ - unsigned int sa_protocol; /* protocol identifier */ - union{ - struct pppoe_addr pppoe; - }sa_addr; +#define PX_PROTO_PPTP 2 +#define PX_MAX_PROTO 3 + +struct sockaddr_pppox { + sa_family_t sa_family; /* address family, AF_PPPOX */ + unsigned int sa_protocol; /* protocol identifier */ + union { + struct pppoe_addr pppoe; + struct pptp_addr pptp; + } sa_addr; } __packed; /* The use of the above union isn't viable because the size of this @@ -101,7 +111,7 @@ struct pppoe_tag { __be16 tag_type; __be16 tag_len; char tag_data[0]; -} __attribute ((packed)); +} __packed; /* Tag identifiers */ #define PTT_EOL __cpu_to_be16(0x0000) @@ -150,15 +160,23 @@ struct pppoe_opt { relayed to (PPPoE relaying) */ }; +struct pptp_opt { + struct pptp_addr src_addr; + struct pptp_addr dst_addr; + u32 ack_sent, ack_recv; + u32 seq_sent, seq_recv; + int ppp_flags; +}; #include struct pppox_sock { /* struct sock must be the first member of pppox_sock */ - struct sock sk; - struct ppp_channel chan; + struct sock sk; + struct ppp_channel chan; struct pppox_sock *next; /* for hash table */ union { struct pppoe_opt pppoe; + struct pptp_opt pptp; } proto; __be16 num; }; diff --git a/include/net/gre.h b/include/net/gre.h new file mode 100644 index 00000000000..82665474bcb --- /dev/null +++ b/include/net/gre.h @@ -0,0 +1,18 @@ +#ifndef __LINUX_GRE_H +#define __LINUX_GRE_H + +#include + +#define GREPROTO_CISCO 0 +#define GREPROTO_PPTP 1 +#define GREPROTO_MAX 2 + +struct gre_protocol { + int (*handler)(struct sk_buff *skb); + void (*err_handler)(struct sk_buff *skb, u32 info); +}; + +int gre_add_protocol(const struct gre_protocol *proto, u8 version); +int gre_del_protocol(const struct gre_protocol *proto, u8 version); + +#endif diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 7c3a7d19124..7458bdae7e9 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -215,8 +215,15 @@ config NET_IPIP be inserted in and removed from the running kernel whenever you want). Most people won't need this and can say N. +config NET_IPGRE_DEMUX + tristate "IP: GRE demultiplexer" + help + This is helper module to demultiplex GRE packets on GRE version field criteria. + Required by ip_gre and pptp modules. + config NET_IPGRE tristate "IP: GRE tunnels over IP" + depends on NET_IPGRE_DEMUX help Tunneling means encapsulating data of one protocol type within another protocol and sending it over a channel that understands the diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index 80ff87ce43a..4978d22f9a7 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -20,6 +20,7 @@ obj-$(CONFIG_PROC_FS) += proc.o obj-$(CONFIG_IP_MULTIPLE_TABLES) += fib_rules.o obj-$(CONFIG_IP_MROUTE) += ipmr.o obj-$(CONFIG_NET_IPIP) += ipip.o +obj-$(CONFIG_NET_IPGRE_DEMUX) += gre.o obj-$(CONFIG_NET_IPGRE) += ip_gre.o obj-$(CONFIG_SYN_COOKIES) += syncookies.o obj-$(CONFIG_INET_AH) += ah4.o diff --git a/net/ipv4/gre.c b/net/ipv4/gre.c new file mode 100644 index 00000000000..b546736da2e --- /dev/null +++ b/net/ipv4/gre.c @@ -0,0 +1,151 @@ +/* + * GRE over IPv4 demultiplexer driver + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +const struct gre_protocol *gre_proto[GREPROTO_MAX] __read_mostly; +static DEFINE_SPINLOCK(gre_proto_lock); + +int gre_add_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version]) + goto err_out_unlock; + + rcu_assign_pointer(gre_proto[version], proto); + spin_unlock(&gre_proto_lock); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_add_protocol); + +int gre_del_protocol(const struct gre_protocol *proto, u8 version) +{ + if (version >= GREPROTO_MAX) + goto err_out; + + spin_lock(&gre_proto_lock); + if (gre_proto[version] != proto) + goto err_out_unlock; + rcu_assign_pointer(gre_proto[version], NULL); + spin_unlock(&gre_proto_lock); + synchronize_rcu(); + return 0; + +err_out_unlock: + spin_unlock(&gre_proto_lock); +err_out: + return -1; +} +EXPORT_SYMBOL_GPL(gre_del_protocol); + +static int gre_rcv(struct sk_buff *skb) +{ + const struct gre_protocol *proto; + u8 ver; + int ret; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->handler) + goto drop_unlock; + ret = proto->handler(skb); + rcu_read_unlock(); + return ret; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); + return NET_RX_DROP; +} + +static void gre_err(struct sk_buff *skb, u32 info) +{ + const struct gre_protocol *proto; + u8 ver; + + if (!pskb_may_pull(skb, 12)) + goto drop; + + ver = skb->data[1]&0x7f; + if (ver >= GREPROTO_MAX) + goto drop; + + rcu_read_lock(); + proto = rcu_dereference(gre_proto[ver]); + if (!proto || !proto->err_handler) + goto drop_unlock; + proto->err_handler(skb, info); + rcu_read_unlock(); + return; + +drop_unlock: + rcu_read_unlock(); +drop: + kfree_skb(skb); +} + +static const struct net_protocol net_gre_protocol = { + .handler = gre_rcv, + .err_handler = gre_err, + .netns_ok = 1, +}; + +static int __init gre_init(void) +{ + pr_info("GRE over IPv4 demultiplexor driver"); + + if (inet_add_protocol(&net_gre_protocol, IPPROTO_GRE) < 0) { + pr_err("gre: can't add protocol\n"); + return -EAGAIN; + } + + return 0; +} + +static void __exit gre_exit(void) +{ + inet_del_protocol(&net_gre_protocol, IPPROTO_GRE); +} + +module_init(gre_init); +module_exit(gre_exit); + +MODULE_DESCRIPTION("GRE over IPv4 demultiplexer driver"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_LICENSE("GPL"); + diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 945b20a5ad5..85176895495 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -44,6 +44,7 @@ #include #include #include +#include #ifdef CONFIG_IPV6 #include @@ -1278,10 +1279,9 @@ static void ipgre_fb_tunnel_init(struct net_device *dev) } -static const struct net_protocol ipgre_protocol = { - .handler = ipgre_rcv, - .err_handler = ipgre_err, - .netns_ok = 1, +static const struct gre_protocol ipgre_protocol = { + .handler = ipgre_rcv, + .err_handler = ipgre_err, }; static void ipgre_destroy_tunnels(struct ipgre_net *ign, struct list_head *head) @@ -1663,7 +1663,7 @@ static int __init ipgre_init(void) if (err < 0) return err; - err = inet_add_protocol(&ipgre_protocol, IPPROTO_GRE); + err = gre_add_protocol(&ipgre_protocol, GREPROTO_CISCO); if (err < 0) { printk(KERN_INFO "ipgre init: can't add protocol\n"); goto add_proto_failed; @@ -1683,7 +1683,7 @@ out: tap_ops_failed: rtnl_link_unregister(&ipgre_link_ops); rtnl_link_failed: - inet_del_protocol(&ipgre_protocol, IPPROTO_GRE); + gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO); add_proto_failed: unregister_pernet_device(&ipgre_net_ops); goto out; @@ -1693,7 +1693,7 @@ static void __exit ipgre_fini(void) { rtnl_link_unregister(&ipgre_tap_ops); rtnl_link_unregister(&ipgre_link_ops); - if (inet_del_protocol(&ipgre_protocol, IPPROTO_GRE) < 0) + if (gre_del_protocol(&ipgre_protocol, GREPROTO_CISCO) < 0) printk(KERN_INFO "ipgre close: can't remove protocol\n"); unregister_pernet_device(&ipgre_net_ops); } -- cgit v1.2.3 From 81ce790bd75d49a0d119f5d7b27405e1d9b1bd57 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 19 Aug 2010 23:51:33 +0000 Subject: irda: use net_device_stats from struct net_device struct net_device has its own struct net_device_stats member, so use this one instead of a private copy in the irlan_cb struct. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 1 - net/irda/irlan/irlan_eth.c | 32 +++++++++----------------------- 2 files changed, 9 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 73cacb3ac16..0af8b8dfbc2 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -171,7 +171,6 @@ struct irlan_cb { int magic; struct list_head dev_list; struct net_device *dev; /* Ethernet device structure*/ - struct net_device_stats stats; __u32 saddr; /* Source device address */ __u32 daddr; /* Destination device address */ diff --git a/net/irda/irlan/irlan_eth.c b/net/irda/irlan/irlan_eth.c index 5bb8353105c..8ee1ff6c742 100644 --- a/net/irda/irlan/irlan_eth.c +++ b/net/irda/irlan/irlan_eth.c @@ -45,13 +45,11 @@ static int irlan_eth_close(struct net_device *dev); static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, struct net_device *dev); static void irlan_eth_set_multicast_list( struct net_device *dev); -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev); static const struct net_device_ops irlan_eth_netdev_ops = { .ndo_open = irlan_eth_open, .ndo_stop = irlan_eth_close, .ndo_start_xmit = irlan_eth_xmit, - .ndo_get_stats = irlan_eth_get_stats, .ndo_set_multicast_list = irlan_eth_set_multicast_list, .ndo_change_mtu = eth_change_mtu, .ndo_validate_addr = eth_validate_addr, @@ -208,10 +206,10 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, * tried :-) DB */ /* irttp_data_request already free the packet */ - self->stats.tx_dropped++; + dev->stats.tx_dropped++; } else { - self->stats.tx_packets++; - self->stats.tx_bytes += len; + dev->stats.tx_packets++; + dev->stats.tx_bytes += len; } return NETDEV_TX_OK; @@ -226,15 +224,16 @@ static netdev_tx_t irlan_eth_xmit(struct sk_buff *skb, int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) { struct irlan_cb *self = instance; + struct net_device *dev = self->dev; if (skb == NULL) { - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; return 0; } if (skb->len < ETH_HLEN) { IRDA_DEBUG(0, "%s() : IrLAN frame too short (%d)\n", __func__, skb->len); - ++self->stats.rx_dropped; + dev->stats.rx_dropped++; dev_kfree_skb(skb); return 0; } @@ -244,10 +243,10 @@ int irlan_eth_receive(void *instance, void *sap, struct sk_buff *skb) * might have been previously set by the low level IrDA network * device driver */ - skb->protocol = eth_type_trans(skb, self->dev); /* Remove eth header */ + skb->protocol = eth_type_trans(skb, dev); /* Remove eth header */ - self->stats.rx_packets++; - self->stats.rx_bytes += skb->len; + dev->stats.rx_packets++; + dev->stats.rx_bytes += skb->len; netif_rx(skb); /* Eat it! */ @@ -348,16 +347,3 @@ static void irlan_eth_set_multicast_list(struct net_device *dev) else irlan_set_broadcast_filter(self, FALSE); } - -/* - * Function irlan_get_stats (dev) - * - * Get the current statistics for this device - * - */ -static struct net_device_stats *irlan_eth_get_stats(struct net_device *dev) -{ - struct irlan_cb *self = netdev_priv(dev); - - return &self->stats; -} -- cgit v1.2.3 From 633adf1ad1c92c02bd3f10bbd73737a969179378 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 14:49:58 +0200 Subject: cfg80211: mark ieee80211_hdrlen const This function analyses only its single, value-passed argument, and has no side effects. Thus it can be const, which makes mac80211 smaller, for example: text data bss dec hex filename 362518 16720 884 380122 5ccda mac80211.ko (before) 362358 16720 884 379962 5cc3a mac80211.ko (after) a 160 byte saving in text size, and an optimisation because the function won't be called as often. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/wireless/util.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2fd06c60ffb..2b403c7ee32 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1715,7 +1715,7 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); * ieee80211_hdrlen - get header length in bytes from frame control * @fc: frame control field in little-endian format */ -unsigned int ieee80211_hdrlen(__le16 fc); +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 diff --git a/net/wireless/util.c b/net/wireless/util.c index 0c8a1e8b769..1eb24162be6 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -221,7 +221,7 @@ const unsigned char bridge_tunnel_header[] __aligned(2) = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0xf8 }; EXPORT_SYMBOL(bridge_tunnel_header); -unsigned int ieee80211_hdrlen(__le16 fc) +unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc) { unsigned int hdrlen = 24; -- cgit v1.2.3 From 2e161f78e5f63a7f9fd25a766bb7f816a01eb14a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 12 Aug 2010 15:38:38 +0200 Subject: cfg80211/mac80211: extensible frame processing Allow userspace to register for more than just action frames by giving the frame subtype, and make it possible to use this in various modes as well. With some tweaks and some added functionality this will, in the future, also be usable in AP mode and be able to replace the cooked monitor interface currently used in that case. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 93 +++++++++++++++++++++++------ include/net/cfg80211.h | 56 +++++++++++------- net/mac80211/cfg.c | 12 ++-- net/mac80211/ieee80211_i.h | 1 + net/mac80211/iface.c | 6 +- net/mac80211/main.c | 37 ++++++++++++ net/mac80211/rx.c | 137 +++++++++++++++++++++++++++++------------- net/mac80211/status.c | 2 +- net/mac80211/util.c | 6 +- net/wireless/core.c | 8 +-- net/wireless/core.h | 21 +++---- net/wireless/mlme.c | 144 +++++++++++++++++++++++++++++---------------- net/wireless/nl80211.c | 108 +++++++++++++++++++++++++--------- net/wireless/nl80211.h | 14 ++--- net/wireless/util.c | 2 +- 15 files changed, 452 insertions(+), 195 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index 2c870168733..8af1e66c3cf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -39,6 +39,43 @@ * TODO: need more info? */ +/** + * DOC: Frame transmission/registration support + * + * Frame transmission and registration support exists to allow userspace + * management entities such as wpa_supplicant react to management frames + * that are not being handled by the kernel. This includes, for example, + * certain classes of action frames that cannot be handled in the kernel + * for various reasons. + * + * Frame registration is done on a per-interface basis and registrations + * cannot be removed other than by closing the socket. It is possible to + * specify a registration filter to register, for example, only for a + * certain type of action frame. In particular with action frames, those + * that userspace registers for will not be returned as unhandled by the + * driver, so that the registered application has to take responsibility + * for doing that. + * + * The type of frame that can be registered for is also dependent on the + * driver and interface type. The frame types are advertised in wiphy + * attributes so applications know what to expect. + * + * NOTE: When an interface changes type while registrations are active, + * these registrations are ignored until the interface type is + * changed again. This means that changing the interface type can + * lead to a situation that couldn't otherwise be produced, but + * any such registrations will be dormant in the sense that they + * will not be serviced, i.e. they will not receive any frames. + * + * Frame transmission allows userspace to send for example the required + * responses to action frames. It is subject to some sanity checking, + * but many frames can be transmitted. When a frame was transmitted, its + * status is indicated to the sending socket. + * + * For more technical details, see the corresponding command descriptions + * below. + */ + /** * enum nl80211_commands - supported nl80211 commands * @@ -301,16 +338,18 @@ * rate selection. %NL80211_ATTR_IFINDEX is used to specify the interface * and @NL80211_ATTR_TX_RATES the set of allowed rates. * - * @NL80211_CMD_REGISTER_ACTION: Register for receiving certain action frames - * (via @NL80211_CMD_ACTION) for processing in userspace. This command - * requires an interface index and a match attribute containing the first - * few bytes of the frame that should match, e.g. a single byte for only - * a category match or four bytes for vendor frames including the OUI. - * The registration cannot be dropped, but is removed automatically - * when the netlink socket is closed. Multiple registrations can be made. - * @NL80211_CMD_ACTION: Action frame TX request and RX notification. This - * command is used both as a request to transmit an Action frame and as an - * event indicating reception of an Action frame that was not processed in + * @NL80211_CMD_REGISTER_FRAME: Register for receiving certain mgmt frames + * (via @NL80211_CMD_FRAME) for processing in userspace. This command + * requires an interface index, a frame type attribute (optional for + * backward compatibility reasons, if not given assumes action frames) + * and a match attribute containing the first few bytes of the frame + * that should match, e.g. a single byte for only a category match or + * four bytes for vendor frames including the OUI. The registration + * cannot be dropped, but is removed automatically when the netlink + * socket is closed. Multiple registrations can be made. + * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This + * command is used both as a request to transmit a management frame and + * as an event indicating reception of a frame that was not processed in * kernel code, but is for us (i.e., which may need to be processed in a * user space application). %NL80211_ATTR_FRAME is used to specify the * frame contents (including header). %NL80211_ATTR_WIPHY_FREQ (and @@ -320,8 +359,8 @@ * operational channel). When called, this operation returns a cookie * (%NL80211_ATTR_COOKIE) that will be included with the TX status event * pertaining to the TX request. - * @NL80211_CMD_ACTION_TX_STATUS: Report TX status of an Action frame - * transmitted with %NL80211_CMD_ACTION. %NL80211_ATTR_COOKIE identifies + * @NL80211_CMD_FRAME_TX_STATUS: Report TX status of a management frame + * transmitted with %NL80211_CMD_FRAME. %NL80211_ATTR_COOKIE identifies * the TX command and %NL80211_ATTR_FRAME includes the contents of the * frame. %NL80211_ATTR_ACK flag is included if the recipient acknowledged * the frame. @@ -429,9 +468,12 @@ enum nl80211_commands { NL80211_CMD_SET_TX_BITRATE_MASK, - NL80211_CMD_REGISTER_ACTION, - NL80211_CMD_ACTION, - NL80211_CMD_ACTION_TX_STATUS, + NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_REGISTER_ACTION = NL80211_CMD_REGISTER_FRAME, + NL80211_CMD_FRAME, + NL80211_CMD_ACTION = NL80211_CMD_FRAME, + NL80211_CMD_FRAME_TX_STATUS, + NL80211_CMD_ACTION_TX_STATUS = NL80211_CMD_FRAME_TX_STATUS, NL80211_CMD_SET_POWER_SAVE, NL80211_CMD_GET_POWER_SAVE, @@ -708,7 +750,16 @@ enum nl80211_commands { * is used with %NL80211_CMD_SET_TX_BITRATE_MASK. * * @NL80211_ATTR_FRAME_MATCH: A binary attribute which typically must contain - * at least one byte, currently used with @NL80211_CMD_REGISTER_ACTION. + * at least one byte, currently used with @NL80211_CMD_REGISTER_FRAME. + * @NL80211_ATTR_FRAME_TYPE: A u16 indicating the frame type/subtype for the + * @NL80211_CMD_REGISTER_FRAME command. + * @NL80211_ATTR_TX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be transmitted with + * %NL80211_CMD_FRAME. + * @NL80211_ATTR_RX_FRAME_TYPES: wiphy capability attribute, which is a + * nested attribute of %NL80211_ATTR_FRAME_TYPE attributes, containing + * information about which frame types can be registered for RX. * * @NL80211_ATTR_ACK: Flag attribute indicating that the frame was * acknowledged by the recipient. @@ -891,6 +942,10 @@ enum nl80211_attrs { NL80211_ATTR_WIPHY_TX_POWER_SETTING, NL80211_ATTR_WIPHY_TX_POWER_LEVEL, + NL80211_ATTR_TX_FRAME_TYPES, + NL80211_ATTR_RX_FRAME_TYPES, + NL80211_ATTR_FRAME_TYPE, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -947,7 +1002,7 @@ enum nl80211_attrs { * @NL80211_IFTYPE_MONITOR: monitor interface receiving all frames * @NL80211_IFTYPE_MESH_POINT: mesh point * @NL80211_IFTYPE_MAX: highest interface type number currently defined - * @__NL80211_IFTYPE_AFTER_LAST: internal use + * @NUM_NL80211_IFTYPES: number of defined interface types * * These values are used with the %NL80211_ATTR_IFTYPE * to set the type of an interface. @@ -964,8 +1019,8 @@ enum nl80211_iftype { NL80211_IFTYPE_MESH_POINT, /* keep last */ - __NL80211_IFTYPE_AFTER_LAST, - NL80211_IFTYPE_MAX = __NL80211_IFTYPE_AFTER_LAST - 1 + NUM_NL80211_IFTYPES, + NL80211_IFTYPE_MAX = NUM_NL80211_IFTYPES - 1 }; /** diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 2b403c7ee32..6a98b1b3bfd 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1020,7 +1020,7 @@ struct cfg80211_pmksa { * @cancel_remain_on_channel: Cancel an on-going remain-on-channel operation. * This allows the operation to be terminated prior to timeout based on * the duration value. - * @action: Transmit an action frame + * @mgmt_tx: Transmit a management frame * * @testmode_cmd: run a test mode command * @@ -1172,7 +1172,7 @@ struct cfg80211_ops { struct net_device *dev, u64 cookie); - int (*action)(struct wiphy *wiphy, struct net_device *dev, + int (*mgmt_tx)(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type, bool channel_type_valid, @@ -1236,6 +1236,10 @@ struct mac_address { u8 addr[ETH_ALEN]; }; +struct ieee80211_txrx_stypes { + u16 tx, rx; +}; + /** * struct wiphy - wireless hardware description * @reg_notifier: the driver's regulatory notification callback @@ -1286,6 +1290,10 @@ struct mac_address { * @privid: a pointer that drivers can use to identify if an arbitrary * wiphy is theirs, e.g. in global notifiers * @bands: information about bands/channels supported by this device + * + * @mgmt_stypes: bitmasks of frame subtypes that can be subscribed to or + * transmitted through nl80211, points to an array indexed by interface + * type */ struct wiphy { /* assign these fields before you register the wiphy */ @@ -1294,9 +1302,12 @@ struct wiphy { u8 perm_addr[ETH_ALEN]; u8 addr_mask[ETH_ALEN]; - u16 n_addresses; struct mac_address *addresses; + const struct ieee80211_txrx_stypes *mgmt_stypes; + + u16 n_addresses; + /* Supported interface modes, OR together BIT(NL80211_IFTYPE_...) */ u16 interface_modes; @@ -1492,8 +1503,8 @@ struct cfg80211_cached_keys; * set by driver (if supported) on add_interface BEFORE registering the * netdev and may otherwise be used by driver read-only, will be update * by cfg80211 on change_interface - * @action_registrations: list of registrations for action frames - * @action_registrations_lock: lock for the list + * @mgmt_registrations: list of registrations for management frames + * @mgmt_registrations_lock: lock for the list * @mtx: mutex used to lock data in this struct * @cleanup_work: work struct used for cleanup that can't be done directly */ @@ -1505,8 +1516,8 @@ struct wireless_dev { struct list_head list; struct net_device *netdev; - struct list_head action_registrations; - spinlock_t action_registrations_lock; + struct list_head mgmt_registrations; + spinlock_t mgmt_registrations_lock; struct mutex mtx; @@ -2373,38 +2384,39 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); /** - * cfg80211_rx_action - notification of received, unprocessed Action frame + * cfg80211_rx_mgmt - notification of received, unprocessed management frame * @dev: network device * @freq: Frequency on which the frame was received in MHz - * @buf: Action frame (header + body) + * @buf: Management frame (header + body) * @len: length of the frame data * @gfp: context flags - * Returns %true if a user space application is responsible for rejecting the - * unrecognized Action frame; %false if no such application is registered - * (i.e., the driver is responsible for rejecting the unrecognized Action - * frame) + * + * Returns %true if a user space application has registered for this frame. + * For action frames, that makes it responsible for rejecting unrecognized + * action frames; %false otherwise, in which case for action frames the + * driver is responsible for rejecting the frame. * * This function is called whenever an Action frame is received for a station * mode interface, but is not processed in kernel. */ -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp); +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp); /** - * cfg80211_action_tx_status - notification of TX status for Action frame + * cfg80211_mgmt_tx_status - notification of TX status for management frame * @dev: network device - * @cookie: Cookie returned by cfg80211_ops::action() - * @buf: Action frame (header + body) + * @cookie: Cookie returned by cfg80211_ops::mgmt_tx() + * @buf: Management frame (header + body) * @len: length of the frame data * @ack: Whether frame was acknowledged * @gfp: context flags * - * This function is called whenever an Action frame was requested to be - * transmitted with cfg80211_ops::action() to report the TX status of the + * This function is called whenever a management frame was requested to be + * transmitted with cfg80211_ops::mgmt_tx() to report the TX status of the * transmission attempt. */ -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp); +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp); /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f9a31776613..94787d21282 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1521,11 +1521,11 @@ static int ieee80211_cancel_remain_on_channel(struct wiphy *wiphy, return ieee80211_wk_cancel_remain_on_channel(sdata, cookie); } -static int ieee80211_action(struct wiphy *wiphy, struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -1625,6 +1625,6 @@ struct cfg80211_ops mac80211_config_ops = { .set_bitrate_mask = ieee80211_set_bitrate_mask, .remain_on_channel = ieee80211_remain_on_channel, .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, - .action = ieee80211_action, + .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 1bf05bfd149..e73ae51dc03 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -170,6 +170,7 @@ typedef unsigned __bitwise__ ieee80211_rx_result; #define IEEE80211_RX_RA_MATCH BIT(1) #define IEEE80211_RX_AMSDU BIT(2) #define IEEE80211_RX_FRAGMENTED BIT(3) +#define IEEE80211_MALFORMED_ACTION_FRM BIT(4) /* only add flags here that do not change with subframes of an aMPDU */ struct ieee80211_rx_data { diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 9459aeee0dd..86f434f234a 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -177,7 +177,7 @@ static int ieee80211_open(struct net_device *dev) /* no special treatment */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* cannot happen */ WARN_ON(1); break; @@ -634,7 +634,7 @@ static void ieee80211_teardown_sdata(struct net_device *dev) case NL80211_IFTYPE_MONITOR: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } @@ -886,7 +886,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, case NL80211_IFTYPE_AP_VLAN: break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: BUG(); break; } diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 0afccda42a2..a53feac4618 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -417,6 +417,41 @@ void ieee80211_napi_complete(struct ieee80211_hw *hw) } EXPORT_SYMBOL(ieee80211_napi_complete); +/* There isn't a lot of sense in it, but you can transmit anything you like */ +static const struct ieee80211_txrx_stypes +ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { + [NL80211_IFTYPE_ADHOC] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_STATION] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, + [NL80211_IFTYPE_AP] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, + [NL80211_IFTYPE_AP_VLAN] = { + /* copy AP */ + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, +}; + struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, const struct ieee80211_ops *ops) { @@ -446,6 +481,8 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, if (!wiphy) return NULL; + wiphy->mgmt_stypes = ieee80211_default_mgmt_stypes; + wiphy->flags |= WIPHY_FLAG_NETNS_OK | WIPHY_FLAG_4ADDR_AP | WIPHY_FLAG_4ADDR_STATION; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 4fdbed58ca2..aa41e382bbb 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -1939,14 +1939,37 @@ static void ieee80211_process_sa_query_req(struct ieee80211_sub_if_data *sdata, ieee80211_tx_skb(sdata, skb); } +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) +{ + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + + /* + * From here on, look only at management frames. + * Data and control frames are already handled, + * and unknown (reserved) frames are useless. + */ + if (rx->skb->len < 24) + return RX_DROP_MONITOR; + + if (!ieee80211_is_mgmt(mgmt->frame_control)) + return RX_DROP_MONITOR; + + if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + return RX_DROP_MONITOR; + + if (ieee80211_drop_unencrypted_mgmt(rx)) + return RX_DROP_UNUSABLE; + + return RX_CONTINUE; +} + static ieee80211_rx_result debug_noinline ieee80211_rx_h_action(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; - struct sk_buff *nskb; - struct ieee80211_rx_status *status; int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) @@ -1962,9 +1985,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!(rx->flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - switch (mgmt->u.action.category) { case WLAN_CATEGORY_BACK: /* @@ -2055,17 +2075,36 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) goto queue; } + return RX_CONTINUE; + invalid: - /* - * For AP mode, hostapd is responsible for handling any action - * frames that we didn't handle, including returning unknown - * ones. For all other modes we will return them to the sender, - * setting the 0x80 bit in the action category, as required by - * 802.11-2007 7.3.1.11. - */ - if (sdata->vif.type == NL80211_IFTYPE_AP || - sdata->vif.type == NL80211_IFTYPE_AP_VLAN) - return RX_DROP_MONITOR; + rx->flags |= IEEE80211_MALFORMED_ACTION_FRM; + /* will return in the next handlers */ + return RX_CONTINUE; + + handled: + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + + queue: + rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; + skb_queue_tail(&sdata->skb_queue, rx->skb); + ieee80211_queue_work(&local->hw, &sdata->work); + if (rx->sta) + rx->sta->rx_packets++; + return RX_QUEUED; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) +{ + struct ieee80211_rx_status *status; + + /* skip known-bad action frames and return them in the next handler */ + if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM) + return RX_CONTINUE; /* * Getting here means the kernel doesn't know how to handle @@ -2075,10 +2114,44 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) */ status = IEEE80211_SKB_RXCB(rx->skb); - if (cfg80211_rx_action(rx->sdata->dev, status->freq, - rx->skb->data, rx->skb->len, - GFP_ATOMIC)) - goto handled; + if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, + rx->skb->data, rx->skb->len, + GFP_ATOMIC)) { + if (rx->sta) + rx->sta->rx_packets++; + dev_kfree_skb(rx->skb); + return RX_QUEUED; + } + + + return RX_CONTINUE; +} + +static ieee80211_rx_result debug_noinline +ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) +{ + struct ieee80211_local *local = rx->local; + struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct sk_buff *nskb; + struct ieee80211_sub_if_data *sdata = rx->sdata; + + if (!ieee80211_is_action(mgmt->frame_control)) + return RX_CONTINUE; + + /* + * For AP mode, hostapd is responsible for handling any action + * frames that we didn't handle, including returning unknown + * ones. For all other modes we will return them to the sender, + * setting the 0x80 bit in the action category, as required by + * 802.11-2007 7.3.1.11. + * Newer versions of hostapd shall also use the management frame + * registration mechanisms, but older ones still use cooked + * monitor interfaces so push all frames there. + */ + if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) && + (sdata->vif.type == NL80211_IFTYPE_AP || + sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) + return RX_DROP_MONITOR; /* do not return rejected action frames */ if (mgmt->u.action.category & 0x80) @@ -2097,20 +2170,8 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) ieee80211_tx_skb(rx->sdata, nskb); } - - handled: - if (rx->sta) - rx->sta->rx_packets++; dev_kfree_skb(rx->skb); return RX_QUEUED; - - queue: - rx->skb->pkt_type = IEEE80211_SDATA_QUEUE_TYPE_FRAME; - skb_queue_tail(&sdata->skb_queue, rx->skb); - ieee80211_queue_work(&local->hw, &sdata->work); - if (rx->sta) - rx->sta->rx_packets++; - return RX_QUEUED; } static ieee80211_rx_result debug_noinline @@ -2121,15 +2182,6 @@ ieee80211_rx_h_mgmt(struct ieee80211_rx_data *rx) struct ieee80211_mgmt *mgmt = (void *)rx->skb->data; __le16 stype; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) - return RX_DROP_MONITOR; - - if (rx->skb->len < 24) - return RX_DROP_MONITOR; - - if (ieee80211_drop_unencrypted_mgmt(rx)) - return RX_DROP_UNUSABLE; - rxs = ieee80211_work_rx_mgmt(rx->sdata, rx->skb); if (rxs != RX_CONTINUE) return rxs; @@ -2374,7 +2426,10 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, if (res != RX_CONTINUE) goto rxh_next; + CALL_RXH(ieee80211_rx_h_mgmt_check) CALL_RXH(ieee80211_rx_h_action) + CALL_RXH(ieee80211_rx_h_userspace_mgmt) + CALL_RXH(ieee80211_rx_h_action_return) CALL_RXH(ieee80211_rx_h_mgmt) rxh_next: @@ -2527,7 +2582,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, break; case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* should never get here */ WARN_ON(1); break; diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 10caec5ea8f..67a35841bef 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -296,7 +296,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) } if (info->flags & IEEE80211_TX_INTFL_NL80211_FRAME_TX) - cfg80211_action_tx_status( + cfg80211_mgmt_tx_status( skb->dev, (unsigned long) skb, skb->data, skb->len, !!(info->flags & IEEE80211_TX_STAT_ACK), GFP_ATOMIC); diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 748387d45bc..cd2b485fed4 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -471,7 +471,7 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -505,7 +505,7 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: @@ -1189,7 +1189,7 @@ int ieee80211_reconfig(struct ieee80211_local *local) /* ignore virtual */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: WARN_ON(1); break; } diff --git a/net/wireless/core.c b/net/wireless/core.c index c70909c3eae..d52630bbab0 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -433,7 +433,7 @@ int wiphy_register(struct wiphy *wiphy) /* sanity check ifmodes */ WARN_ON(!ifmodes); - ifmodes &= ((1 << __NL80211_IFTYPE_AFTER_LAST) - 1) & ~1; + ifmodes &= ((1 << NUM_NL80211_IFTYPES) - 1) & ~1; if (WARN_ON(ifmodes != wiphy->interface_modes)) wiphy->interface_modes = ifmodes; @@ -685,8 +685,8 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, INIT_WORK(&wdev->cleanup_work, wdev_cleanup_work); INIT_LIST_HEAD(&wdev->event_list); spin_lock_init(&wdev->event_lock); - INIT_LIST_HEAD(&wdev->action_registrations); - spin_lock_init(&wdev->action_registrations_lock); + INIT_LIST_HEAD(&wdev->mgmt_registrations); + spin_lock_init(&wdev->mgmt_registrations_lock); mutex_lock(&rdev->devlist_mtx); list_add_rcu(&wdev->list, &rdev->netdev_list); @@ -806,7 +806,7 @@ static int cfg80211_netdev_notifier_call(struct notifier_block * nb, sysfs_remove_link(&dev->dev.kobj, "phy80211"); list_del_rcu(&wdev->list); rdev->devlist_generation++; - cfg80211_mlme_purge_actions(wdev); + cfg80211_mlme_purge_registrations(wdev); #ifdef CONFIG_CFG80211_WEXT kfree(wdev->wext.keys); #endif diff --git a/net/wireless/core.h b/net/wireless/core.h index 63d57ae399c..58ab2c791d2 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -331,16 +331,17 @@ void __cfg80211_connect_result(struct net_device *dev, const u8 *bssid, const u8 *resp_ie, size_t resp_ie_len, u16 status, bool wextev, struct cfg80211_bss *bss); -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len); -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid); -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev); -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie); +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len); +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid); +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev); +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie); /* SME */ int __cfg80211_connect(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index ee0af32ed59..8515b1e5c57 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -748,31 +748,51 @@ void cfg80211_new_sta(struct net_device *dev, const u8 *mac_addr, } EXPORT_SYMBOL(cfg80211_new_sta); -struct cfg80211_action_registration { +struct cfg80211_mgmt_registration { struct list_head list; u32 nlpid; int match_len; + __le16 frame_type; + u8 match[]; }; -int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, - const u8 *match_data, int match_len) +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, + u16 frame_type, const u8 *match_data, + int match_len) { - struct cfg80211_action_registration *reg, *nreg; + struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; + u16 mgmt_type; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; + + if ((frame_type & IEEE80211_FCTL_FTYPE) != IEEE80211_FTYPE_MGMT) + return -EINVAL; + + if (frame_type & ~(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) + return -EINVAL; + + mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].rx & BIT(mgmt_type))) + return -EINVAL; nreg = kzalloc(sizeof(*reg) + match_len, GFP_KERNEL); if (!nreg) return -ENOMEM; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry(reg, &wdev->action_registrations, list) { + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { int mlen = min(match_len, reg->match_len); + if (frame_type != le16_to_cpu(reg->frame_type)) + continue; + if (memcmp(reg->match, match_data, mlen) == 0) { err = -EALREADY; break; @@ -787,62 +807,75 @@ int cfg80211_mlme_register_action(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; nreg->nlpid = snd_pid; - list_add(&nreg->list, &wdev->action_registrations); + nreg->frame_type = cpu_to_le16(frame_type); + list_add(&nreg->list, &wdev->mgmt_registrations); out: - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return err; } -void cfg80211_mlme_unregister_actions(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { if (reg->nlpid == nlpid) { list_del(®->list); kfree(reg); } } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -void cfg80211_mlme_purge_actions(struct wireless_dev *wdev) +void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) { - struct cfg80211_action_registration *reg, *tmp; + struct cfg80211_mgmt_registration *reg, *tmp; - spin_lock_bh(&wdev->action_registrations_lock); + spin_lock_bh(&wdev->mgmt_registrations_lock); - list_for_each_entry_safe(reg, tmp, &wdev->action_registrations, list) { + list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { list_del(®->list); kfree(reg); } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); } -int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, - struct net_device *dev, - struct ieee80211_channel *chan, - enum nl80211_channel_type channel_type, - bool channel_type_valid, - const u8 *buf, size_t len, u64 *cookie) +int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev, + struct net_device *dev, + struct ieee80211_channel *chan, + enum nl80211_channel_type channel_type, + bool channel_type_valid, + const u8 *buf, size_t len, u64 *cookie) { struct wireless_dev *wdev = dev->ieee80211_ptr; const struct ieee80211_mgmt *mgmt; + u16 stype; + + if (!wdev->wiphy->mgmt_stypes) + return -EOPNOTSUPP; - if (rdev->ops->action == NULL) + if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; + if (len < 24 + 1) return -EINVAL; mgmt = (const struct ieee80211_mgmt *) buf; - if (!ieee80211_is_action(mgmt->frame_control)) + + if (!ieee80211_is_mgmt(mgmt->frame_control)) return -EINVAL; - if (mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { + + stype = le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE; + if (!(wdev->wiphy->mgmt_stypes[wdev->iftype].tx & BIT(stype >> 4))) + return -EINVAL; + + if (ieee80211_is_action(mgmt->frame_control) && + mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) { /* Verify that we are associated with the destination AP */ wdev_lock(wdev); @@ -863,64 +896,75 @@ int cfg80211_mlme_action(struct cfg80211_registered_device *rdev, return -EINVAL; /* Transmit the Action frame as requested by user space */ - return rdev->ops->action(&rdev->wiphy, dev, chan, channel_type, - channel_type_valid, buf, len, cookie); + return rdev->ops->mgmt_tx(&rdev->wiphy, dev, chan, channel_type, + channel_type_valid, buf, len, cookie); } -bool cfg80211_rx_action(struct net_device *dev, int freq, const u8 *buf, - size_t len, gfp_t gfp) +bool cfg80211_rx_mgmt(struct net_device *dev, int freq, const u8 *buf, + size_t len, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); - struct cfg80211_action_registration *reg; - const u8 *action_data; - int action_data_len; + struct cfg80211_mgmt_registration *reg; + const struct ieee80211_txrx_stypes *stypes = + &wiphy->mgmt_stypes[wdev->iftype]; + struct ieee80211_mgmt *mgmt = (void *)buf; + const u8 *data; + int data_len; bool result = false; + __le16 ftype = mgmt->frame_control & + cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE); + u16 stype; - /* frame length - min size excluding category */ - action_data_len = len - (IEEE80211_MIN_ACTION_SIZE - 1); + stype = (le16_to_cpu(mgmt->frame_control) & IEEE80211_FCTL_STYPE) >> 4; - /* action data starts with category */ - action_data = buf + IEEE80211_MIN_ACTION_SIZE - 1; + if (!(stypes->rx & BIT(stype))) + return false; - spin_lock_bh(&wdev->action_registrations_lock); + data = buf + ieee80211_hdrlen(mgmt->frame_control); + data_len = len - ieee80211_hdrlen(mgmt->frame_control); + + spin_lock_bh(&wdev->mgmt_registrations_lock); + + list_for_each_entry(reg, &wdev->mgmt_registrations, list) { + if (reg->frame_type != ftype) + continue; - list_for_each_entry(reg, &wdev->action_registrations, list) { - if (reg->match_len > action_data_len) + if (reg->match_len > data_len) continue; - if (memcmp(reg->match, action_data, reg->match_len)) + if (memcmp(reg->match, data, reg->match_len)) continue; /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_action(rdev, dev, reg->nlpid, freq, - buf, len, gfp)) + if (nl80211_send_mgmt(rdev, dev, reg->nlpid, freq, + buf, len, gfp)) continue; result = true; break; } - spin_unlock_bh(&wdev->action_registrations_lock); + spin_unlock_bh(&wdev->mgmt_registrations_lock); return result; } -EXPORT_SYMBOL(cfg80211_rx_action); +EXPORT_SYMBOL(cfg80211_rx_mgmt); -void cfg80211_action_tx_status(struct net_device *dev, u64 cookie, - const u8 *buf, size_t len, bool ack, gfp_t gfp) +void cfg80211_mgmt_tx_status(struct net_device *dev, u64 cookie, + const u8 *buf, size_t len, bool ack, gfp_t gfp) { struct wireless_dev *wdev = dev->ieee80211_ptr; struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); /* Indicate TX status of the Action frame to user space */ - nl80211_send_action_tx_status(rdev, dev, cookie, buf, len, ack, gfp); + nl80211_send_mgmt_tx_status(rdev, dev, cookie, buf, len, ack, gfp); } -EXPORT_SYMBOL(cfg80211_action_tx_status); +EXPORT_SYMBOL(cfg80211_mgmt_tx_status); void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index bb5b78eebeb..927ffbd2aeb 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -156,6 +156,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_WIPHY_TX_POWER_SETTING] = { .type = NLA_U32 }, [NL80211_ATTR_WIPHY_TX_POWER_LEVEL] = { .type = NLA_U32 }, + [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, }; /* policy for the attributes */ @@ -437,6 +438,8 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, struct ieee80211_rate *rate; int i; u16 ifmodes = dev->wiphy.interface_modes; + const struct ieee80211_txrx_stypes *mgmt_stypes = + dev->wiphy.mgmt_stypes; hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) @@ -587,7 +590,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, CMD(flush_pmksa, FLUSH_PMKSA); CMD(remain_on_channel, REMAIN_ON_CHANNEL); CMD(set_bitrate_mask, SET_TX_BITRATE_MASK); - CMD(action, ACTION); + CMD(mgmt_tx, FRAME); if (dev->wiphy.flags & WIPHY_FLAG_NETNS_OK) { i++; NLA_PUT_U32(msg, i, NL80211_CMD_SET_WIPHY_NETNS); @@ -608,6 +611,53 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, nla_nest_end(msg, nl_cmds); + if (mgmt_stypes) { + u16 stypes; + struct nlattr *nl_ftypes, *nl_ifs; + enum nl80211_iftype ift; + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_TX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].tx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + + nl_ifs = nla_nest_start(msg, NL80211_ATTR_RX_FRAME_TYPES); + if (!nl_ifs) + goto nla_put_failure; + + for (ift = 0; ift < NUM_NL80211_IFTYPES; ift++) { + nl_ftypes = nla_nest_start(msg, ift); + if (!nl_ftypes) + goto nla_put_failure; + i = 0; + stypes = mgmt_stypes[ift].rx; + while (stypes) { + if (stypes & 1) + NLA_PUT_U16(msg, NL80211_ATTR_FRAME_TYPE, + (i << 4) | IEEE80211_FTYPE_MGMT); + stypes >>= 1; + i++; + } + nla_nest_end(msg, nl_ftypes); + } + nla_nest_end(msg, nl_ifs); + } + return genlmsg_end(msg, hdr); nla_put_failure: @@ -4732,17 +4782,18 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb, return err; } -static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; + u16 frame_type = IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION; int err; if (!info->attrs[NL80211_ATTR_FRAME_MATCH]) return -EINVAL; - if (nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]) < 1) - return -EINVAL; + if (info->attrs[NL80211_ATTR_FRAME_TYPE]) + frame_type = nla_get_u16(info->attrs[NL80211_ATTR_FRAME_TYPE]); rtnl_lock(); @@ -4757,12 +4808,13 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) } /* not much point in registering if we can't reply */ - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } - err = cfg80211_mlme_register_action(dev->ieee80211_ptr, info->snd_pid, + err = cfg80211_mlme_register_mgmt(dev->ieee80211_ptr, info->snd_pid, + frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); out: @@ -4773,7 +4825,7 @@ static int nl80211_register_action(struct sk_buff *skb, struct genl_info *info) return err; } -static int nl80211_action(struct sk_buff *skb, struct genl_info *info) +static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) { struct cfg80211_registered_device *rdev; struct net_device *dev; @@ -4796,7 +4848,7 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) if (err) goto unlock_rtnl; - if (!rdev->ops->action) { + if (!rdev->ops->mgmt_tx) { err = -EOPNOTSUPP; goto out; } @@ -4839,17 +4891,17 @@ static int nl80211_action(struct sk_buff *skb, struct genl_info *info) } hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, - NL80211_CMD_ACTION); + NL80211_CMD_FRAME); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); goto free_msg; } - err = cfg80211_mlme_action(rdev, dev, chan, channel_type, - channel_type_valid, - nla_data(info->attrs[NL80211_ATTR_FRAME]), - nla_len(info->attrs[NL80211_ATTR_FRAME]), - &cookie); + err = cfg80211_mlme_mgmt_tx(rdev, dev, chan, channel_type, + channel_type_valid, + nla_data(info->attrs[NL80211_ATTR_FRAME]), + nla_len(info->attrs[NL80211_ATTR_FRAME]), + &cookie); if (err) goto free_msg; @@ -5348,14 +5400,14 @@ static struct genl_ops nl80211_ops[] = { .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_REGISTER_ACTION, - .doit = nl80211_register_action, + .cmd = NL80211_CMD_REGISTER_FRAME, + .doit = nl80211_register_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, { - .cmd = NL80211_CMD_ACTION, - .doit = nl80211_action, + .cmd = NL80211_CMD_FRAME, + .doit = nl80211_tx_mgmt, .policy = nl80211_policy, .flags = GENL_ADMIN_PERM, }, @@ -6055,9 +6107,9 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, nl80211_mlme_mcgrp.id, gfp); } -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, - int freq, const u8 *buf, size_t len, gfp_t gfp) +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, + int freq, const u8 *buf, size_t len, gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6067,7 +6119,7 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME); if (!hdr) { nlmsg_free(msg); return -ENOMEM; @@ -6095,10 +6147,10 @@ int nl80211_send_action(struct cfg80211_registered_device *rdev, return -ENOBUFS; } -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp) +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp) { struct sk_buff *msg; void *hdr; @@ -6107,7 +6159,7 @@ void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, if (!msg) return; - hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_ACTION_TX_STATUS); + hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS); if (!hdr) { nlmsg_free(msg); return; @@ -6194,7 +6246,7 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) list_for_each_entry_rcu(wdev, &rdev->netdev_list, list) - cfg80211_mlme_unregister_actions(wdev, notify->pid); + cfg80211_mlme_unregister_socket(wdev, notify->pid); rcu_read_unlock(); diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h index 2ad7fbc7d9f..30d2f939150 100644 --- a/net/wireless/nl80211.h +++ b/net/wireless/nl80211.h @@ -74,13 +74,13 @@ void nl80211_send_sta_event(struct cfg80211_registered_device *rdev, struct net_device *dev, const u8 *mac_addr, struct station_info *sinfo, gfp_t gfp); -int nl80211_send_action(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u32 nlpid, int freq, - const u8 *buf, size_t len, gfp_t gfp); -void nl80211_send_action_tx_status(struct cfg80211_registered_device *rdev, - struct net_device *netdev, u64 cookie, - const u8 *buf, size_t len, bool ack, - gfp_t gfp); +int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u32 nlpid, int freq, + const u8 *buf, size_t len, gfp_t gfp); +void nl80211_send_mgmt_tx_status(struct cfg80211_registered_device *rdev, + struct net_device *netdev, u64 cookie, + const u8 *buf, size_t len, bool ack, + gfp_t gfp); void nl80211_send_cqm_rssi_notify(struct cfg80211_registered_device *rdev, diff --git a/net/wireless/util.c b/net/wireless/util.c index 1eb24162be6..8d961cc4ae9 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -823,7 +823,7 @@ int cfg80211_change_iface(struct cfg80211_registered_device *rdev, /* monitor can't bridge anyway */ break; case NL80211_IFTYPE_UNSPECIFIED: - case __NL80211_IFTYPE_AFTER_LAST: + case NUM_NL80211_IFTYPES: /* not happening */ break; } -- cgit v1.2.3 From 633dd1ea683d907af944bcd9814092efe9869b05 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 18 Aug 2010 15:01:23 +0200 Subject: mac80211: fix docbook Fix a small problem in the documentation for ieee80211_request_smps, and a now erroneous inclusion of enum ieee80211_key_alg, which no longer exists after the change to ciphers. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 1 - include/net/mac80211.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 6f88e184d2e..52310ac892e 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -248,7 +248,6 @@ MISSING !Finclude/net/mac80211.h set_key_cmd !Finclude/net/mac80211.h ieee80211_key_conf -!Finclude/net/mac80211.h ieee80211_key_alg !Finclude/net/mac80211.h ieee80211_key_flags diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 914c747ac3a..2a181136607 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2537,7 +2537,7 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); /** * ieee80211_request_smps - request SM PS transition * @vif: &struct ieee80211_vif pointer from the add_interface callback. - * @mode: new SM PS mode + * @smps_mode: new SM PS mode * * This allows the driver to request an SM PS transition in managed * mode. This is useful when the driver has more information than -- cgit v1.2.3 From d70e96932de55fb2c05b1c0af1dff178651a9b77 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 19 Aug 2010 16:11:27 +0200 Subject: cfg80211: add some documentation Add some documentation for cfg80211. I'm hoping some of the regulatory documentation will be filled by somebody more familiar with it, hint hint! :) Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- Documentation/DocBook/80211.tmpl | 136 +++++++++++++++++++++++++++++++++++++-- include/net/cfg80211.h | 121 ++++++++++++++++++++++++++++++++-- 2 files changed, 246 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/Documentation/DocBook/80211.tmpl b/Documentation/DocBook/80211.tmpl index 52310ac892e..b84c9282828 100644 --- a/Documentation/DocBook/80211.tmpl +++ b/Documentation/DocBook/80211.tmpl @@ -65,18 +65,144 @@ +!Ainclude/net/cfg80211.h The cfg80211 subsystem - -MISSING - +!Pinclude/net/cfg80211.h Introduction - + + Device registration +!Pinclude/net/cfg80211.h Device registration !Finclude/net/cfg80211.h ieee80211_band - +!Finclude/net/cfg80211.h ieee80211_channel_flags +!Finclude/net/cfg80211.h ieee80211_channel +!Finclude/net/cfg80211.h ieee80211_rate_flags +!Finclude/net/cfg80211.h ieee80211_rate +!Finclude/net/cfg80211.h ieee80211_sta_ht_cap +!Finclude/net/cfg80211.h ieee80211_supported_band +!Finclude/net/cfg80211.h cfg80211_signal_type +!Finclude/net/cfg80211.h wiphy_params_flags +!Finclude/net/cfg80211.h wiphy_flags +!Finclude/net/cfg80211.h wiphy +!Finclude/net/cfg80211.h wireless_dev +!Finclude/net/cfg80211.h wiphy_new +!Finclude/net/cfg80211.h wiphy_register +!Finclude/net/cfg80211.h wiphy_unregister +!Finclude/net/cfg80211.h wiphy_free + +!Finclude/net/cfg80211.h wiphy_name +!Finclude/net/cfg80211.h wiphy_dev +!Finclude/net/cfg80211.h wiphy_priv +!Finclude/net/cfg80211.h priv_to_wiphy +!Finclude/net/cfg80211.h set_wiphy_dev +!Finclude/net/cfg80211.h wdev_priv + + + Actions and configuration +!Pinclude/net/cfg80211.h Actions and configuration +!Finclude/net/cfg80211.h cfg80211_ops +!Finclude/net/cfg80211.h vif_params +!Finclude/net/cfg80211.h key_params +!Finclude/net/cfg80211.h survey_info_flags +!Finclude/net/cfg80211.h survey_info +!Finclude/net/cfg80211.h beacon_parameters +!Finclude/net/cfg80211.h plink_actions +!Finclude/net/cfg80211.h station_parameters +!Finclude/net/cfg80211.h station_info_flags +!Finclude/net/cfg80211.h rate_info_flags +!Finclude/net/cfg80211.h rate_info +!Finclude/net/cfg80211.h station_info +!Finclude/net/cfg80211.h monitor_flags +!Finclude/net/cfg80211.h mpath_info_flags +!Finclude/net/cfg80211.h mpath_info +!Finclude/net/cfg80211.h bss_parameters +!Finclude/net/cfg80211.h ieee80211_txq_params +!Finclude/net/cfg80211.h cfg80211_crypto_settings +!Finclude/net/cfg80211.h cfg80211_auth_request +!Finclude/net/cfg80211.h cfg80211_assoc_request +!Finclude/net/cfg80211.h cfg80211_deauth_request +!Finclude/net/cfg80211.h cfg80211_disassoc_request +!Finclude/net/cfg80211.h cfg80211_ibss_params +!Finclude/net/cfg80211.h cfg80211_connect_params +!Finclude/net/cfg80211.h cfg80211_pmksa +!Finclude/net/cfg80211.h cfg80211_send_rx_auth +!Finclude/net/cfg80211.h cfg80211_send_auth_timeout +!Finclude/net/cfg80211.h __cfg80211_auth_canceled +!Finclude/net/cfg80211.h cfg80211_send_rx_assoc +!Finclude/net/cfg80211.h cfg80211_send_assoc_timeout +!Finclude/net/cfg80211.h cfg80211_send_deauth +!Finclude/net/cfg80211.h __cfg80211_send_deauth +!Finclude/net/cfg80211.h cfg80211_send_disassoc +!Finclude/net/cfg80211.h __cfg80211_send_disassoc +!Finclude/net/cfg80211.h cfg80211_ibss_joined +!Finclude/net/cfg80211.h cfg80211_connect_result +!Finclude/net/cfg80211.h cfg80211_roamed +!Finclude/net/cfg80211.h cfg80211_disconnected +!Finclude/net/cfg80211.h cfg80211_ready_on_channel +!Finclude/net/cfg80211.h cfg80211_remain_on_channel_expired +!Finclude/net/cfg80211.h cfg80211_new_sta +!Finclude/net/cfg80211.h cfg80211_rx_mgmt +!Finclude/net/cfg80211.h cfg80211_mgmt_tx_status +!Finclude/net/cfg80211.h cfg80211_cqm_rssi_notify +!Finclude/net/cfg80211.h cfg80211_michael_mic_failure + + + Scanning and BSS list handling +!Pinclude/net/cfg80211.h Scanning and BSS list handling +!Finclude/net/cfg80211.h cfg80211_ssid +!Finclude/net/cfg80211.h cfg80211_scan_request +!Finclude/net/cfg80211.h cfg80211_scan_done +!Finclude/net/cfg80211.h cfg80211_bss +!Finclude/net/cfg80211.h cfg80211_inform_bss_frame +!Finclude/net/cfg80211.h cfg80211_inform_bss +!Finclude/net/cfg80211.h cfg80211_unlink_bss +!Finclude/net/cfg80211.h cfg80211_find_ie +!Finclude/net/cfg80211.h ieee80211_bss_get_ie + + + Utility functions +!Pinclude/net/cfg80211.h Utility functions +!Finclude/net/cfg80211.h ieee80211_channel_to_frequency +!Finclude/net/cfg80211.h ieee80211_frequency_to_channel +!Finclude/net/cfg80211.h ieee80211_get_channel +!Finclude/net/cfg80211.h ieee80211_get_response_rate +!Finclude/net/cfg80211.h ieee80211_hdrlen +!Finclude/net/cfg80211.h ieee80211_get_hdrlen_from_skb +!Finclude/net/cfg80211.h ieee80211_radiotap_iterator + + + Data path helpers +!Pinclude/net/cfg80211.h Data path helpers +!Finclude/net/cfg80211.h ieee80211_data_to_8023 +!Finclude/net/cfg80211.h ieee80211_data_from_8023 +!Finclude/net/cfg80211.h ieee80211_amsdu_to_8023s +!Finclude/net/cfg80211.h cfg80211_classify8021d + + + Regulatory enforcement infrastructure +!Pinclude/net/cfg80211.h Regulatory enforcement infrastructure +!Finclude/net/cfg80211.h regulatory_hint +!Finclude/net/cfg80211.h wiphy_apply_custom_regulatory +!Finclude/net/cfg80211.h freq_reg_info + + + RFkill integration +!Pinclude/net/cfg80211.h RFkill integration +!Finclude/net/cfg80211.h wiphy_rfkill_set_hw_state +!Finclude/net/cfg80211.h wiphy_rfkill_start_polling +!Finclude/net/cfg80211.h wiphy_rfkill_stop_polling + + + Test mode +!Pinclude/net/cfg80211.h Test mode +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_reply_skb +!Finclude/net/cfg80211.h cfg80211_testmode_reply +!Finclude/net/cfg80211.h cfg80211_testmode_alloc_event_skb +!Finclude/net/cfg80211.h cfg80211_testmode_event + diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6a98b1b3bfd..f2740537b5d 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -25,6 +25,43 @@ #include +/** + * DOC: Introduction + * + * cfg80211 is the configuration API for 802.11 devices in Linux. It bridges + * userspace and drivers, and offers some utility functionality associated + * with 802.11. cfg80211 must, directly or indirectly via mac80211, be used + * by all modern wireless drivers in Linux, so that they offer a consistent + * API through nl80211. For backward compatibility, cfg80211 also offers + * wireless extensions to userspace, but hides them from drivers completely. + * + * Additionally, cfg80211 contains code to help enforce regulatory spectrum + * use restrictions. + */ + + +/** + * DOC: Device registration + * + * In order for a driver to use cfg80211, it must register the hardware device + * with cfg80211. This happens through a number of hardware capability structs + * described below. + * + * The fundamental structure for each device is the 'wiphy', of which each + * instance describes a physical wireless device connected to the system. Each + * such wiphy can have zero, one, or many virtual interfaces associated with + * it, which need to be identified as such by pointing the network interface's + * @ieee80211_ptr pointer to a &struct wireless_dev which further describes + * the wireless part of the interface, normally this struct is embedded in the + * network interface's private data area. Drivers can optionally allow creating + * or destroying virtual interfaces on the fly, but without at least one or the + * ability to create some the wireless device isn't useful. + * + * Each wiphy structure contains device capability information, and also has + * a pointer to the various operations the driver offers. The definitions and + * structures here describe these capabilities in detail. + */ + /* * wireless hardware capability structures */ @@ -204,6 +241,21 @@ struct ieee80211_supported_band { * Wireless hardware/device configuration structures and methods */ +/** + * DOC: Actions and configuration + * + * Each wireless device and each virtual interface offer a set of configuration + * operations and other actions that are invoked by userspace. Each of these + * actions is described in the operations structure, and the parameters these + * operations use are described separately. + * + * Additionally, some operations are asynchronous and expect to get status + * information via some functions that drivers need to call. + * + * Scanning and BSS list handling with its associated functionality is described + * in a separate chapter. + */ + /** * struct vif_params - describes virtual interface parameters * @mesh_id: mesh ID to use @@ -570,8 +622,28 @@ struct ieee80211_txq_params { /* from net/wireless.h */ struct wiphy; -/* from net/ieee80211.h */ -struct ieee80211_channel; +/** + * DOC: Scanning and BSS list handling + * + * The scanning process itself is fairly simple, but cfg80211 offers quite + * a bit of helper functionality. To start a scan, the scan operation will + * be invoked with a scan definition. This scan definition contains the + * channels to scan, and the SSIDs to send probe requests for (including the + * wildcard, if desired). A passive scan is indicated by having no SSIDs to + * probe. Additionally, a scan request may contain extra information elements + * that should be added to the probe request. The IEs are guaranteed to be + * well-formed, and will not exceed the maximum length the driver advertised + * in the wiphy structure. + * + * When scanning finds a BSS, cfg80211 needs to be notified of that, because + * it is responsible for maintaining the BSS list; the driver should not + * maintain a list itself. For this notification, various functions exist. + * + * Since drivers do not maintain a BSS list, there are also a number of + * functions to search for a BSS and obtain information about it from the + * BSS structure cfg80211 maintains. The BSS list is also made available + * to userspace. + */ /** * struct cfg80211_ssid - SSID description @@ -1574,8 +1646,10 @@ static inline void *wdev_priv(struct wireless_dev *wdev) return wiphy_priv(wdev->wiphy); } -/* - * Utility functions +/** + * DOC: Utility functions + * + * cfg80211 offers a number of utility functions that can be useful. */ /** @@ -1728,6 +1802,14 @@ unsigned int ieee80211_get_hdrlen_from_skb(const struct sk_buff *skb); */ unsigned int __attribute_const__ ieee80211_hdrlen(__le16 fc); +/** + * DOC: Data path helpers + * + * In addition to generic utilities, cfg80211 also offers + * functions that help implement the data path for devices + * that do not do the 802.11/802.3 conversion on the device. + */ + /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 * @skb: the 802.11 data frame @@ -1788,8 +1870,10 @@ unsigned int cfg80211_classify8021d(struct sk_buff *skb); */ const u8 *cfg80211_find_ie(u8 eid, const u8 *ies, int len); -/* - * Regulatory helper functions for wiphys +/** + * DOC: Regulatory enforcement infrastructure + * + * TODO */ /** @@ -2191,6 +2275,20 @@ void cfg80211_michael_mic_failure(struct net_device *dev, const u8 *addr, */ void cfg80211_ibss_joined(struct net_device *dev, const u8 *bssid, gfp_t gfp); +/** + * DOC: RFkill integration + * + * RFkill integration in cfg80211 is almost invisible to drivers, + * as cfg80211 automatically registers an rfkill instance for each + * wireless device it knows about. Soft kill is also translated + * into disconnecting and turning all interfaces off, drivers are + * expected to turn off the device when all interfaces are down. + * + * However, devices may have a hard RFkill line, in which case they + * also need to interact with the rfkill subsystem, via cfg80211. + * They can do this with a few helper functions documented here. + */ + /** * wiphy_rfkill_set_hw_state - notify cfg80211 about hw block state * @wiphy: the wiphy @@ -2211,6 +2309,17 @@ void wiphy_rfkill_start_polling(struct wiphy *wiphy); void wiphy_rfkill_stop_polling(struct wiphy *wiphy); #ifdef CONFIG_NL80211_TESTMODE +/** + * DOC: Test mode + * + * Test mode is a set of utility functions to allow drivers to + * interact with driver-specific tools to aid, for instance, + * factory programming. + * + * This chapter describes how drivers interact with it, for more + * information see the nl80211 book's chapter on it. + */ + /** * cfg80211_testmode_alloc_reply_skb - allocate testmode reply * @wiphy: the wiphy -- cgit v1.2.3 From 2738bd682df546f34654ed3d59dfc9ebe8d04979 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sat, 21 Aug 2010 16:39:01 -0400 Subject: mac80211: trivial spelling fixes Fix spelling and readability of a few lines of kernel doc: s/issueing/issuing/g s/approriate/appropriate/g s/supported by simply/supported simply by/ s/IEEE80211_HW_BEACON_FILTERING/IEEE80211_HW_BEACON_FILTER/g Signed-off-by: Bob Copeland Signed-off-by: John W. Linville --- include/net/mac80211.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 2a181136607..dcc8c2bf986 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1242,8 +1242,8 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * %IEEE80211_CONF_PS flag enabled means that the powersave mode defined in * IEEE 802.11-2007 section 11.2 is enabled. This is not to be confused * with hardware wakeup and sleep states. Driver is responsible for waking - * up the hardware before issueing commands to the hardware and putting it - * back to sleep at approriate times. + * up the hardware before issuing commands to the hardware and putting it + * back to sleep at appropriate times. * * When PS is enabled, hardware needs to wakeup for beacons and receive the * buffered multicast/broadcast frames after the beacon. Also it must be @@ -1264,7 +1264,7 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * there's data traffic and still saving significantly power in idle * periods. * - * Dynamic powersave is supported by simply mac80211 enabling and disabling + * Dynamic powersave is simply supported by mac80211 enabling and disabling * PS based on traffic. Driver needs to only set %IEEE80211_HW_SUPPORTS_PS * flag and mac80211 will handle everything automatically. Additionally, * hardware having support for the dynamic PS feature may set the @@ -2458,7 +2458,7 @@ void ieee80211_sta_block_awake(struct ieee80211_hw *hw, * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER and * %IEEE80211_CONF_PS is set, the driver needs to inform whenever the * hardware is not receiving beacons with this function. */ @@ -2469,7 +2469,7 @@ void ieee80211_beacon_loss(struct ieee80211_vif *vif); * * @vif: &struct ieee80211_vif pointer from the add_interface callback. * - * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTERING, and + * When beacon filtering is enabled with %IEEE80211_HW_BEACON_FILTER, and * %IEEE80211_CONF_PS and %IEEE80211_HW_CONNECTION_MONITOR are set, the driver * needs to inform if the connection to the AP has been lost. * -- cgit v1.2.3 From 145ce502e44b57c074c72cfdc855557e19026999 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 24 Aug 2010 13:21:08 +0000 Subject: net/sctp: Use pr_fmt and pr_ Change SCTP_DEBUG_PRINTK and SCTP_DEBUG_PRINTK_IPADDR to use do { print } while (0) guards. Add SCTP_DEBUG_PRINTK_CONT to fix errors in log when lines were continued. Add #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt Add a missing newline in "Failed bind hash alloc" Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 48 ++++++++++++++++++++++++++++++------------------ net/sctp/associola.c | 2 ++ net/sctp/chunk.c | 2 ++ net/sctp/inqueue.c | 2 ++ net/sctp/ipv6.c | 4 +++- net/sctp/objcnt.c | 5 +++-- net/sctp/output.c | 2 ++ net/sctp/outqueue.c | 34 ++++++++++++++++++---------------- net/sctp/probe.c | 4 +++- net/sctp/protocol.c | 17 ++++++++--------- net/sctp/sm_make_chunk.c | 2 ++ net/sctp/sm_sideeffect.c | 21 ++++++++++----------- net/sctp/sm_statefuns.c | 20 ++++++++++---------- net/sctp/sm_statetable.c | 42 +++++++++++++++++++++--------------------- net/sctp/socket.c | 39 +++++++++++++++------------------------ net/sctp/transport.c | 9 +++++---- 16 files changed, 136 insertions(+), 117 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65946bc43d0..2cb3980b161 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -275,24 +275,35 @@ struct sctp_mib { /* Print debugging messages. */ #if SCTP_DEBUG extern int sctp_debug_flag; -#define SCTP_DEBUG_PRINTK(whatever...) \ - ((void) (sctp_debug_flag && printk(KERN_DEBUG whatever))) -#define SCTP_DEBUG_PRINTK_IPADDR(lead, trail, leadparm, saddr, otherparms...) \ - if (sctp_debug_flag) { \ - if (saddr->sa.sa_family == AF_INET6) { \ - printk(KERN_DEBUG \ - lead "%pI6" trail, \ - leadparm, \ - &saddr->v6.sin6_addr, \ - otherparms); \ - } else { \ - printk(KERN_DEBUG \ - lead "%pI4" trail, \ - leadparm, \ - &saddr->v4.sin_addr.s_addr, \ - otherparms); \ - } \ - } +#define SCTP_DEBUG_PRINTK(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + printk(KERN_DEBUG pr_fmt(fmt), ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) \ +do { \ + if (sctp_debug_flag) \ + pr_cont(fmt, ##args); \ +} while (0) +#define SCTP_DEBUG_PRINTK_IPADDR(fmt_lead, fmt_trail, \ + args_lead, saddr, args_trail...) \ +do { \ + if (sctp_debug_flag) { \ + if (saddr->sa.sa_family == AF_INET6) { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI6" fmt_trail), \ + args_lead, \ + &saddr->v6.sin6_addr, \ + args_trail); \ + } else { \ + printk(KERN_DEBUG \ + pr_fmt(fmt_lead "%pI4" fmt_trail), \ + args_lead, \ + &saddr->v4.sin_addr.s_addr, \ + args_trail); \ + } \ + } \ +} while (0) #define SCTP_ENABLE_DEBUG { sctp_debug_flag = 1; } #define SCTP_DISABLE_DEBUG { sctp_debug_flag = 0; } @@ -306,6 +317,7 @@ extern int sctp_debug_flag; #else /* SCTP_DEBUG */ #define SCTP_DEBUG_PRINTK(whatever...) +#define SCTP_DEBUG_PRINTK_CONT(fmt, args...) #define SCTP_DEBUG_PRINTK_IPADDR(whatever...) #define SCTP_ENABLE_DEBUG #define SCTP_DISABLE_DEBUG diff --git a/net/sctp/associola.c b/net/sctp/associola.c index 0b85e525643..5f1fb8bd862 100644 --- a/net/sctp/associola.c +++ b/net/sctp/associola.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/chunk.c b/net/sctp/chunk.c index 476caaf100e..6c8556459a7 100644 --- a/net/sctp/chunk.c +++ b/net/sctp/chunk.c @@ -37,6 +37,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index ccb6dc48d15..397296fb156 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -43,6 +43,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 732689140fb..95e0c8eda1a 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -336,7 +338,7 @@ static void sctp_v6_get_saddr(struct sctp_sock *sk, memcpy(saddr, baddr, sizeof(union sctp_addr)); SCTP_DEBUG_PRINTK("saddr: %pI6\n", &saddr->v6.sin6_addr); } else { - printk(KERN_ERR "%s: asoc:%p Could not find a valid source " + pr_err("%s: asoc:%p Could not find a valid source " "address for the dest:%pI6\n", __func__, asoc, &daddr->v6.sin6_addr); } diff --git a/net/sctp/objcnt.c b/net/sctp/objcnt.c index f73ec0ea93b..8ef8e7d9eb6 100644 --- a/net/sctp/objcnt.c +++ b/net/sctp/objcnt.c @@ -38,6 +38,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -134,8 +136,7 @@ void sctp_dbg_objcnt_init(void) ent = proc_create("sctp_dbg_objcnt", 0, proc_net_sctp, &sctp_objcnt_ops); if (!ent) - printk(KERN_WARNING - "sctp_dbg_objcnt: Unable to create /proc entry.\n"); + pr_warn("sctp_dbg_objcnt: Unable to create /proc entry.\n"); } /* Cleanup the objcount entry in the proc filesystem. */ diff --git a/net/sctp/output.c b/net/sctp/output.c index a646681f5ac..901764b17ae 100644 --- a/net/sctp/output.c +++ b/net/sctp/output.c @@ -41,6 +41,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index c04b2eb5918..8c6d379b4bb 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include /* For struct list_head */ #include @@ -1463,23 +1465,23 @@ static void sctp_check_transmitted(struct sctp_outq *q, /* Display the end of the * current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); } /* Start a new range. */ - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_ack_tsn = tsn; break; case 1: /* The last TSN was NOT ACKed. */ if (dbg_last_kept_tsn != dbg_kept_tsn) { /* Display the end of current range. */ - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); } - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1526,18 +1528,18 @@ static void sctp_check_transmitted(struct sctp_outq *q, break; if (dbg_last_kept_tsn != dbg_kept_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_kept_tsn); - SCTP_DEBUG_PRINTK(",%08x", tsn); + SCTP_DEBUG_PRINTK_CONT(",%08x", tsn); dbg_kept_tsn = tsn; break; case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) - SCTP_DEBUG_PRINTK("-%08x", - dbg_last_ack_tsn); - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("-%08x", + dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("\n"); /* FALL THROUGH... */ default: @@ -1556,17 +1558,17 @@ static void sctp_check_transmitted(struct sctp_outq *q, switch (dbg_prt_state) { case 0: if (dbg_last_ack_tsn != dbg_ack_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_ack_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_ack_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } break; case 1: if (dbg_last_kept_tsn != dbg_kept_tsn) { - SCTP_DEBUG_PRINTK("-%08x\n", dbg_last_kept_tsn); + SCTP_DEBUG_PRINTK_CONT("-%08x\n", dbg_last_kept_tsn); } else { - SCTP_DEBUG_PRINTK("\n"); + SCTP_DEBUG_PRINTK_CONT("\n"); } } #endif /* SCTP_DEBUG */ diff --git a/net/sctp/probe.c b/net/sctp/probe.c index db3a42b8b34..2e63e9dc010 100644 --- a/net/sctp/probe.c +++ b/net/sctp/probe.c @@ -22,6 +22,8 @@ * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -192,7 +194,7 @@ static __init int sctpprobe_init(void) if (ret) goto remove_proc; - pr_info("SCTP probe registered (port=%d)\n", port); + pr_info("probe registered (port=%d)\n", port); return 0; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 5027b83f1cc..f774e657641 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -707,8 +709,7 @@ static int sctp_ctl_sock_init(void) &init_net); if (err < 0) { - printk(KERN_ERR - "SCTP: Failed to create the SCTP control socket.\n"); + pr_err("Failed to create the SCTP control socket\n"); return err; } return 0; @@ -1206,7 +1207,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_assoc_hashtable && --order > 0); if (!sctp_assoc_hashtable) { - printk(KERN_ERR "SCTP: Failed association hash alloc.\n"); + pr_err("Failed association hash alloc\n"); status = -ENOMEM; goto err_ahash_alloc; } @@ -1220,7 +1221,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_ep_hashtable = (struct sctp_hashbucket *) kmalloc(64 * sizeof(struct sctp_hashbucket), GFP_KERNEL); if (!sctp_ep_hashtable) { - printk(KERN_ERR "SCTP: Failed endpoint_hash alloc.\n"); + pr_err("Failed endpoint_hash alloc\n"); status = -ENOMEM; goto err_ehash_alloc; } @@ -1239,7 +1240,7 @@ SCTP_STATIC __init int sctp_init(void) __get_free_pages(GFP_ATOMIC, order); } while (!sctp_port_hashtable && --order > 0); if (!sctp_port_hashtable) { - printk(KERN_ERR "SCTP: Failed bind hash alloc."); + pr_err("Failed bind hash alloc\n"); status = -ENOMEM; goto err_bhash_alloc; } @@ -1248,8 +1249,7 @@ SCTP_STATIC __init int sctp_init(void) INIT_HLIST_HEAD(&sctp_port_hashtable[i].chain); } - printk(KERN_INFO "SCTP: Hash tables configured " - "(established %d bind %d)\n", + pr_info("Hash tables configured (established %d bind %d)\n", sctp_assoc_hashsize, sctp_port_hashsize); /* Disable ADDIP by default. */ @@ -1290,8 +1290,7 @@ SCTP_STATIC __init int sctp_init(void) /* Initialize the control inode/socket for handling OOTB packets. */ if ((status = sctp_ctl_sock_init())) { - printk (KERN_ERR - "SCTP: Failed to initialize the SCTP control sock.\n"); + pr_err("Failed to initialize the SCTP control sock\n"); goto err_ctl_sock_init; } diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 246f9292465..2cc46f0962c 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include diff --git a/net/sctp/sm_sideeffect.c b/net/sctp/sm_sideeffect.c index f5e5e27cac5..b21b218d564 100644 --- a/net/sctp/sm_sideeffect.c +++ b/net/sctp/sm_sideeffect.c @@ -47,6 +47,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1146,26 +1148,23 @@ static int sctp_side_effects(sctp_event_t event_type, sctp_subtype_t subtype, case SCTP_DISPOSITION_VIOLATION: if (net_ratelimit()) - printk(KERN_ERR "sctp protocol violation state %d " - "chunkid %d\n", state, subtype.chunk); + pr_err("protocol violation state %d chunkid %d\n", + state, subtype.chunk); break; case SCTP_DISPOSITION_NOT_IMPL: - printk(KERN_WARNING "sctp unimplemented feature in state %d, " - "event_type %d, event_id %d\n", - state, event_type, subtype.chunk); + pr_warn("unimplemented feature in state %d, event_type %d, event_id %d\n", + state, event_type, subtype.chunk); break; case SCTP_DISPOSITION_BUG: - printk(KERN_ERR "sctp bug in state %d, " - "event_type %d, event_id %d\n", + pr_err("bug in state %d, event_type %d, event_id %d\n", state, event_type, subtype.chunk); BUG(); break; default: - printk(KERN_ERR "sctp impossible disposition %d " - "in state %d, event_type %d, event_id %d\n", + pr_err("impossible disposition %d in state %d, event_type %d, event_id %d\n", status, state, event_type, subtype.chunk); BUG(); break; @@ -1679,8 +1678,8 @@ static int sctp_cmd_interpreter(sctp_event_t event_type, sctp_cmd_send_asconf(asoc); break; default: - printk(KERN_WARNING "Impossible command: %u, %p\n", - cmd->verb, cmd->obj.ptr); + pr_warn("Impossible command: %u, %p\n", + cmd->verb, cmd->obj.ptr); break; } diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 24b2cd55563..8b284436be6 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -50,6 +50,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -1138,18 +1140,16 @@ sctp_disposition_t sctp_sf_backbeat_8_3(const struct sctp_endpoint *ep, if (unlikely(!link)) { if (from_addr.sa.sa_family == AF_INET6) { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI6\n", - __func__, - asoc, - &from_addr.v6.sin6_addr); + pr_warn("%s association %p could not find address %pI6\n", + __func__, + asoc, + &from_addr.v6.sin6_addr); } else { if (net_ratelimit()) - printk(KERN_WARNING - "%s association %p could not find address %pI4\n", - __func__, - asoc, - &from_addr.v4.sin_addr.s_addr); + pr_warn("%s association %p could not find address %pI4\n", + __func__, + asoc, + &from_addr.v4.sin_addr.s_addr); } return SCTP_DISPOSITION_DISCARD; } diff --git a/net/sctp/sm_statetable.c b/net/sctp/sm_statetable.c index 6d9b3aafcc5..546d4387fb3 100644 --- a/net/sctp/sm_statetable.c +++ b/net/sctp/sm_statetable.c @@ -46,6 +46,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -66,15 +68,19 @@ static const sctp_sm_table_entry_t bug = { .name = "sctp_sf_bug" }; -#define DO_LOOKUP(_max, _type, _table) \ - if ((event_subtype._type > (_max))) { \ - printk(KERN_WARNING \ - "sctp table %p possible attack:" \ - " event %d exceeds max %d\n", \ - _table, event_subtype._type, _max); \ - return &bug; \ - } \ - return &_table[event_subtype._type][(int)state]; +#define DO_LOOKUP(_max, _type, _table) \ +({ \ + const sctp_sm_table_entry_t *rtn; \ + \ + if ((event_subtype._type > (_max))) { \ + pr_warn("table %p possible attack: event %d exceeds max %d\n", \ + _table, event_subtype._type, _max); \ + rtn = &bug; \ + } else \ + rtn = &_table[event_subtype._type][(int)state]; \ + \ + rtn; \ +}) const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, sctp_state_t state, @@ -83,21 +89,15 @@ const sctp_sm_table_entry_t *sctp_sm_lookup_event(sctp_event_t event_type, switch (event_type) { case SCTP_EVENT_T_CHUNK: return sctp_chunk_event_lookup(event_subtype.chunk, state); - break; case SCTP_EVENT_T_TIMEOUT: - DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, - timeout_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_TIMEOUT_MAX, timeout, + timeout_event_table); case SCTP_EVENT_T_OTHER: - DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, other_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_OTHER_MAX, other, + other_event_table); case SCTP_EVENT_T_PRIMITIVE: - DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, - primitive_event_table); - break; - + return DO_LOOKUP(SCTP_EVENT_PRIMITIVE_MAX, primitive, + primitive_event_table); default: /* Yikes! We got an illegal event type. */ return &bug; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index ca44917872d..f4bec277235 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -2458,9 +2460,8 @@ static int sctp_setsockopt_delayed_ack(struct sock *sk, if (params.sack_delay == 0 && params.sack_freq == 0) return 0; } else if (optlen == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, optlen)) return -EFAULT; @@ -2868,10 +2869,8 @@ static int sctp_setsockopt_maxseg(struct sock *sk, char __user *optval, unsigned int val; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; params.assoc_id = 0; @@ -3121,10 +3120,8 @@ static int sctp_setsockopt_maxburst(struct sock *sk, int assoc_id = 0; if (optlen == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); if (copy_from_user(&val, optval, optlen)) return -EFAULT; } else if (optlen == sizeof(struct sctp_assoc_value)) { @@ -4281,9 +4278,8 @@ static int sctp_getsockopt_delayed_ack(struct sock *sk, int len, if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else if (len == sizeof(struct sctp_assoc_value)) { - printk(KERN_WARNING "SCTP: Use of struct sctp_assoc_value " - "in delayed_ack socket option deprecated\n"); - printk(KERN_WARNING "SCTP: Use struct sctp_sack_info instead\n"); + pr_warn("Use of struct sctp_assoc_value in delayed_ack socket option deprecated\n"); + pr_warn("Use struct sctp_sack_info instead\n"); if (copy_from_user(¶ms, optval, len)) return -EFAULT; } else @@ -4929,10 +4925,8 @@ static int sctp_getsockopt_maxseg(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in maxseg socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in maxseg socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5023,10 +5017,8 @@ static int sctp_getsockopt_maxburst(struct sock *sk, int len, struct sctp_association *asoc; if (len == sizeof(int)) { - printk(KERN_WARNING - "SCTP: Use of int in max_burst socket option deprecated\n"); - printk(KERN_WARNING - "SCTP: Use struct sctp_assoc_value instead\n"); + pr_warn("Use of int in max_burst socket option deprecated\n"); + pr_warn("Use struct sctp_assoc_value instead\n"); params.assoc_id = 0; } else if (len >= sizeof(struct sctp_assoc_value)) { len = sizeof(struct sctp_assoc_value); @@ -5586,8 +5578,7 @@ SCTP_STATIC int sctp_listen_start(struct sock *sk, int backlog) tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (IS_ERR(tfm)) { if (net_ratelimit()) { - printk(KERN_INFO - "SCTP: failed to load transform for %s: %ld\n", + pr_info("failed to load transform for %s: %ld\n", sctp_hmac_alg, PTR_ERR(tfm)); } return -ENOSYS; diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 132046cb82f..d3ae493d234 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -48,6 +48,8 @@ * be incorporated into the next SCTP release. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -244,10 +246,9 @@ void sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) struct dst_entry *dst; if (unlikely(pmtu < SCTP_DEFAULT_MINSEGMENT)) { - printk(KERN_WARNING "%s: Reported pmtu %d too low, " - "using default minimum of %d\n", - __func__, pmtu, - SCTP_DEFAULT_MINSEGMENT); + pr_warn("%s: Reported pmtu %d too low, using default minimum of %d\n", + __func__, pmtu, + SCTP_DEFAULT_MINSEGMENT); /* Use default minimum segment size and disable * pmtu discovery on this transport. */ -- cgit v1.2.3 From 8789d459bc5e837bf37d261453df96ef54018d7b Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 26 Aug 2010 13:30:26 +0200 Subject: mac80211: allow scan to complete from any context The ieee80211_scan_completed() function was a frequent source of potential deadlocks, since it is called by drivers but may call back into drivers, so drivers had to make sure to call it without any locks held, which frequently lead to more complex code in drivers. Avoid that problem by allowing the function to be called in any context, and queueing the actual work it does. Also update the documentation for it to indicate this. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 3 ++- net/mac80211/ieee80211_i.h | 6 ++++++ net/mac80211/scan.c | 34 ++++++++++++++++++++++++++-------- 3 files changed, 34 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index dcc8c2bf986..8f97548b6d8 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2268,7 +2268,8 @@ void ieee80211_wake_queues(struct ieee80211_hw *hw); * * When hardware scan offload is used (i.e. the hw_scan() callback is * assigned) this function needs to be called by the driver to notify - * mac80211 that the scan finished. + * mac80211 that the scan finished. This function can be called from + * any context, including hardirq context. * * @hw: the hardware that finished the scan * @aborted: set to true if scan was aborted diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 9e225f01497..31713320258 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -596,11 +596,17 @@ enum queue_stop_reason { * determine if we are on the operating channel or not * @SCAN_OFF_CHANNEL: We're off our operating channel for scanning, * gets only set in conjunction with SCAN_SW_SCANNING + * @SCAN_COMPLETED: Set for our scan work function when the driver reported + * that the scan completed. + * @SCAN_ABORTED: Set for our scan work function when the driver reported + * a scan complete for an aborted scan. */ enum { SCAN_SW_SCANNING, SCAN_HW_SCANNING, SCAN_OFF_CHANNEL, + SCAN_COMPLETED, + SCAN_ABORTED, }; /** diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c index 31f233f7f51..d60389ba9b9 100644 --- a/net/mac80211/scan.c +++ b/net/mac80211/scan.c @@ -248,13 +248,11 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local) return true; } -void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) { struct ieee80211_local *local = hw_to_local(hw); bool was_hw_scan; - trace_api_scan_completed(local, aborted); - mutex_lock(&local->mtx); /* @@ -312,6 +310,18 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) ieee80211_mesh_notify_scan_completed(local); ieee80211_queue_work(&local->hw, &local->work_work); } + +void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted) +{ + struct ieee80211_local *local = hw_to_local(hw); + + trace_api_scan_completed(local, aborted); + + set_bit(SCAN_COMPLETED, &local->scanning); + if (aborted) + set_bit(SCAN_ABORTED, &local->scanning); + ieee80211_queue_delayed_work(&local->hw, &local->scan_work, 0); +} EXPORT_SYMBOL(ieee80211_scan_completed); static int ieee80211_start_sw_scan(struct ieee80211_local *local) @@ -449,7 +459,7 @@ static int ieee80211_scan_state_decision(struct ieee80211_local *local, /* if no more bands/channels left, complete scan and advance to the idle state */ if (local->scan_channel_idx >= local->scan_req->n_channels) { - ieee80211_scan_completed(&local->hw, false); + __ieee80211_scan_completed(&local->hw, false); return 1; } @@ -641,6 +651,14 @@ void ieee80211_scan_work(struct work_struct *work) struct ieee80211_sub_if_data *sdata = local->scan_sdata; unsigned long next_delay = 0; + if (test_and_clear_bit(SCAN_COMPLETED, &local->scanning)) { + bool aborted; + + aborted = test_and_clear_bit(SCAN_ABORTED, &local->scanning); + __ieee80211_scan_completed(&local->hw, aborted); + return; + } + mutex_lock(&local->mtx); if (!sdata || !local->scan_req) { mutex_unlock(&local->mtx); @@ -651,7 +669,7 @@ void ieee80211_scan_work(struct work_struct *work) int rc = drv_hw_scan(local, sdata, local->hw_scan_req); mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -666,7 +684,7 @@ void ieee80211_scan_work(struct work_struct *work) mutex_unlock(&local->mtx); if (rc) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -676,7 +694,7 @@ void ieee80211_scan_work(struct work_struct *work) * Avoid re-scheduling when the sdata is going away. */ if (!ieee80211_sdata_running(sdata)) { - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); return; } @@ -783,5 +801,5 @@ void ieee80211_scan_cancel(struct ieee80211_local *local) mutex_unlock(&local->mtx); if (abortscan) - ieee80211_scan_completed(&local->hw, true); + __ieee80211_scan_completed(&local->hw, true); } -- cgit v1.2.3 From c0692b8fe29fb4d4dad33487aabf3ed7e1e880c0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 14:26:53 +0300 Subject: cfg80211: allow changing port control protocol Some vendor specified mechanisms for 802.1X-style functionality use a different protocol than EAP (even if EAP is vendor-extensible). Allow setting the ethertype for the protocol when a driver has support for this. The default if unspecified is EAP, of course. Note: This is suitable only for station mode, not for AP implementation. Signed-off-by: Johannes Berg Signed-off-by: Juuso Oikarinen Signed-off-by: John W. Linville --- include/linux/nl80211.h | 16 +++++++++++++++- include/net/cfg80211.h | 24 +++++++++++++++++------- net/wireless/nl80211.c | 25 ++++++++++++++++++++++--- net/wireless/wext-sme.c | 2 ++ 4 files changed, 56 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index ec1690da784..31603e8b558 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -295,7 +295,9 @@ * auth and assoc steps. For this, you need to specify the SSID in a * %NL80211_ATTR_SSID attribute, and can optionally specify the association * IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_MAC, - * %NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_CONTROL_PORT. + * %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT, + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE and + * %NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT. * It is also sent as an event, with the BSSID and response IEs when the * connection is established or failed to be established. This can be * determined by the STATUS_CODE attribute. @@ -686,6 +688,15 @@ enum nl80211_commands { * request, the driver will assume that the port is unauthorized until * authorized by user space. Otherwise, port is marked authorized by * default in station mode. + * @NL80211_ATTR_CONTROL_PORT_ETHERTYPE: A 16-bit value indicating the + * ethertype that will be used for key negotiation. It can be + * specified with the associate and connect commands. If it is not + * specified, the value defaults to 0x888E (PAE, 802.1X). This + * attribute is also used as a flag in the wiphy information to + * indicate that protocols other than PAE are supported. + * @NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT: When included along with + * %NL80211_ATTR_CONTROL_PORT_ETHERTYPE, indicates that the custom + * ethertype frames used for key negotiation must not be encrypted. * * @NL80211_ATTR_TESTDATA: Testmode data blob, passed through to the driver. * We recommend using nested, driver-specific attributes within this. @@ -951,6 +962,9 @@ enum nl80211_attrs { NL80211_ATTR_RX_FRAME_TYPES, NL80211_ATTR_FRAME_TYPE, + NL80211_ATTR_CONTROL_PORT_ETHERTYPE, + NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f2740537b5d..4c8c727d0cc 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -763,6 +763,10 @@ const u8 *ieee80211_bss_get_ie(struct cfg80211_bss *bss, u8 ie); * sets/clears %NL80211_STA_FLAG_AUTHORIZED. If true, the driver is * required to assume that the port is unauthorized until authorized by * user space. Otherwise, port is marked authorized by default. + * @control_port_ethertype: the control port protocol that should be + * allowed through even on unauthorized ports + * @control_port_no_encrypt: TRUE to prevent encryption of control port + * protocol frames. */ struct cfg80211_crypto_settings { u32 wpa_versions; @@ -772,6 +776,8 @@ struct cfg80211_crypto_settings { int n_akm_suites; u32 akm_suites[NL80211_MAX_NR_AKM_SUITES]; bool control_port; + __be16 control_port_ethertype; + bool control_port_no_encrypt; }; /** @@ -1293,15 +1299,19 @@ struct cfg80211_ops { * @WIPHY_FLAG_4ADDR_AP: supports 4addr mode even on AP (with a single station * on a VLAN interface) * @WIPHY_FLAG_4ADDR_STATION: supports 4addr mode even as a station + * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the + * control port protocol ethertype. The device also honours the + * control_port_no_encrypt flag. */ enum wiphy_flags { - WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), - WIPHY_FLAG_STRICT_REGULATORY = BIT(1), - WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), - WIPHY_FLAG_NETNS_OK = BIT(3), - WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), - WIPHY_FLAG_4ADDR_AP = BIT(5), - WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), + WIPHY_FLAG_STRICT_REGULATORY = BIT(1), + WIPHY_FLAG_DISABLE_BEACON_HINTS = BIT(2), + WIPHY_FLAG_NETNS_OK = BIT(3), + WIPHY_FLAG_PS_ON_BY_DEFAULT = BIT(4), + WIPHY_FLAG_4ADDR_AP = BIT(5), + WIPHY_FLAG_4ADDR_STATION = BIT(6), + WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), }; struct mac_address { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 49f5ca35e78..85a23de7bff 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -136,6 +136,8 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { .len = sizeof(struct nl80211_sta_flag_update), }, [NL80211_ATTR_CONTROL_PORT] = { .type = NLA_FLAG }, + [NL80211_ATTR_CONTROL_PORT_ETHERTYPE] = { .type = NLA_U16 }, + [NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT] = { .type = NLA_FLAG }, [NL80211_ATTR_PRIVACY] = { .type = NLA_FLAG }, [NL80211_ATTR_CIPHER_SUITE_GROUP] = { .type = NLA_U32 }, [NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 }, @@ -474,6 +476,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U8(msg, NL80211_ATTR_MAX_NUM_PMKIDS, dev->wiphy.max_num_pmkids); + if (dev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) + NLA_PUT_FLAG(msg, NL80211_ATTR_CONTROL_PORT_ETHERTYPE); + nl_modes = nla_nest_start(msg, NL80211_ATTR_SUPPORTED_IFTYPES); if (!nl_modes) goto nla_put_failure; @@ -3691,7 +3696,8 @@ unlock_rtnl: return err; } -static int nl80211_crypto_settings(struct genl_info *info, +static int nl80211_crypto_settings(struct cfg80211_registered_device *rdev, + struct genl_info *info, struct cfg80211_crypto_settings *settings, int cipher_limit) { @@ -3699,6 +3705,19 @@ static int nl80211_crypto_settings(struct genl_info *info, settings->control_port = info->attrs[NL80211_ATTR_CONTROL_PORT]; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]) { + u16 proto; + proto = nla_get_u16( + info->attrs[NL80211_ATTR_CONTROL_PORT_ETHERTYPE]); + settings->control_port_ethertype = cpu_to_be16(proto); + if (!(rdev->wiphy.flags & WIPHY_FLAG_CONTROL_PORT_PROTOCOL) && + proto != ETH_P_PAE) + return -EINVAL; + if (info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]) + settings->control_port_no_encrypt = true; + } else + settings->control_port_ethertype = cpu_to_be16(ETH_P_PAE); + if (info->attrs[NL80211_ATTR_CIPHER_SUITES_PAIRWISE]) { void *data; int len, i; @@ -3826,7 +3845,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_PREV_BSSID]) prev_bssid = nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]); - err = nl80211_crypto_settings(info, &crypto, 1); + err = nl80211_crypto_settings(rdev, info, &crypto, 1); if (!err) err = cfg80211_mlme_assoc(rdev, dev, chan, bssid, prev_bssid, ssid, ssid_len, ie, ie_len, use_mfp, @@ -4303,7 +4322,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info) connect.privacy = info->attrs[NL80211_ATTR_PRIVACY]; - err = nl80211_crypto_settings(info, &connect.crypto, + err = nl80211_crypto_settings(rdev, info, &connect.crypto, NL80211_MAX_NR_CIPHER_SUITES); if (err) return err; diff --git a/net/wireless/wext-sme.c b/net/wireless/wext-sme.c index 9818198add8..6fffe62d7c2 100644 --- a/net/wireless/wext-sme.c +++ b/net/wireless/wext-sme.c @@ -197,6 +197,8 @@ int cfg80211_mgd_wext_siwessid(struct net_device *dev, wdev->wext.connect.ssid_len = len; wdev->wext.connect.crypto.control_port = false; + wdev->wext.connect.crypto.control_port_ethertype = + cpu_to_be16(ETH_P_PAE); err = cfg80211_mgd_wext_connect(rdev, wdev); out: -- cgit v1.2.3 From 34d4bc4d41d282a66dafe1b01a7d46bad468cefb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 27 Aug 2010 12:35:58 +0200 Subject: mac80211: support runtime interface type changes Add support to mac80211 for changing the interface type even when the interface is UP, if the driver supports it. To achieve this * add a new driver callback for switching, * split some of the interface up/down code out into new functions (do_open/do_stop), and * maintain an own __SDATA_RUNNING bit that will not be set during interface type, so that any other code doesn't use the interface. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 +++ net/mac80211/cfg.c | 3 - net/mac80211/driver-ops.h | 14 ++++ net/mac80211/driver-trace.h | 25 +++++++ net/mac80211/ieee80211_i.h | 14 +++- net/mac80211/iface.c | 157 ++++++++++++++++++++++++++++++++++---------- 6 files changed, 185 insertions(+), 37 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 8f97548b6d8..f91fc331369 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1537,6 +1537,12 @@ enum ieee80211_ampdu_mlme_action { * negative error code (which will be seen in userspace.) * Must be implemented and can sleep. * + * @change_interface: Called when a netdevice changes type. This callback + * is optional, but only if it is supported can interface types be + * switched while the interface is UP. The callback may sleep. + * Note that while an interface is being switched, it will not be + * found by the interface iteration callbacks. + * * @remove_interface: Notifies a driver that an interface is going down. * The @stop callback is called after this if it is the last interface * and no monitor interfaces are present. @@ -1693,6 +1699,9 @@ struct ieee80211_ops { void (*stop)(struct ieee80211_hw *hw); int (*add_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); + int (*change_interface)(struct ieee80211_hw *hw, + struct ieee80211_vif *vif, + enum nl80211_iftype new_type); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index f82b18e996b..5de1ca3f17b 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -52,9 +52,6 @@ static int ieee80211_change_iface(struct wiphy *wiphy, struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); int ret; - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - ret = ieee80211_if_change_type(sdata, type); if (ret) return ret; diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 14123dce544..6064b7b09e0 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -54,6 +54,20 @@ static inline int drv_add_interface(struct ieee80211_local *local, return ret; } +static inline int drv_change_interface(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + int ret; + + might_sleep(); + + trace_drv_change_interface(local, sdata, type); + ret = local->ops->change_interface(&local->hw, &sdata->vif, type); + trace_drv_return_int(local, ret); + return ret; +} + static inline void drv_remove_interface(struct ieee80211_local *local, struct ieee80211_vif *vif) { diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index b5a95582d81..f6f3d89e43f 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -136,6 +136,31 @@ TRACE_EVENT(drv_add_interface, ) ); +TRACE_EVENT(drv_change_interface, + TP_PROTO(struct ieee80211_local *local, + struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type), + + TP_ARGS(local, sdata, type), + + TP_STRUCT__entry( + LOCAL_ENTRY + VIF_ENTRY + __field(u32, new_type) + ), + + TP_fast_assign( + LOCAL_ASSIGN; + VIF_ASSIGN; + __entry->new_type = type; + ), + + TP_printk( + LOCAL_PR_FMT VIF_PR_FMT " new type:%d", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type + ) +); + TRACE_EVENT(drv_remove_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata), diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f6483778868..d529bd5eab4 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -472,6 +472,16 @@ enum ieee80211_sub_if_data_flags { IEEE80211_SDATA_DONT_BRIDGE_PACKETS = BIT(3), }; +/** + * enum ieee80211_sdata_state_bits - virtual interface state bits + * @SDATA_STATE_RUNNING: virtual interface is up & running; this + * mirrors netif_running() but is separate for interface type + * change handling while the interface is up + */ +enum ieee80211_sdata_state_bits { + SDATA_STATE_RUNNING, +}; + struct ieee80211_sub_if_data { struct list_head list; @@ -485,6 +495,8 @@ struct ieee80211_sub_if_data { unsigned int flags; + unsigned long state; + int drop_unencrypted; char name[IFNAMSIZ]; @@ -1087,7 +1099,7 @@ void ieee80211_recalc_idle(struct ieee80211_local *local); static inline bool ieee80211_sdata_running(struct ieee80211_sub_if_data *sdata) { - return netif_running(sdata->dev); + return test_bit(SDATA_STATE_RUNNING, &sdata->state); } /* tx handling */ diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index cba3d806d72..c1cc200ac81 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -148,7 +148,12 @@ static int ieee80211_check_concurrent_iface(struct ieee80211_sub_if_data *sdata, return 0; } -static int ieee80211_open(struct net_device *dev) +/* + * NOTE: Be very careful when changing this function, it must NOT return + * an error on interface type changes that have been pre-checked, so most + * checks should be in ieee80211_check_concurrent_iface. + */ +static int ieee80211_do_open(struct net_device *dev, bool coming_up) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); struct ieee80211_local *local = sdata->local; @@ -157,15 +162,6 @@ static int ieee80211_open(struct net_device *dev) int res; u32 hw_reconf_flags = 0; - /* fail early if user set an invalid address */ - if (!is_zero_ether_addr(dev->dev_addr) && - !is_valid_ether_addr(dev->dev_addr)) - return -EADDRNOTAVAIL; - - res = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); - if (res) - return res; - switch (sdata->vif.type) { case NL80211_IFTYPE_WDS: if (!is_valid_ether_addr(sdata->u.wds.remote_addr)) @@ -258,9 +254,11 @@ static int ieee80211_open(struct net_device *dev) netif_carrier_on(dev); break; default: - res = drv_add_interface(local, &sdata->vif); - if (res) - goto err_stop; + if (coming_up) { + res = drv_add_interface(local, &sdata->vif); + if (res) + goto err_stop; + } if (ieee80211_vif_is_mesh(&sdata->vif)) { local->fif_other_bss++; @@ -316,7 +314,9 @@ static int ieee80211_open(struct net_device *dev) hw_reconf_flags |= __ieee80211_recalc_idle(local); mutex_unlock(&local->mtx); - local->open_count++; + if (coming_up) + local->open_count++; + if (hw_reconf_flags) { ieee80211_hw_config(local, hw_reconf_flags); /* @@ -331,6 +331,8 @@ static int ieee80211_open(struct net_device *dev) netif_tx_start_all_queues(dev); + set_bit(SDATA_STATE_RUNNING, &sdata->state); + return 0; err_del_interface: drv_remove_interface(local, &sdata->vif); @@ -344,19 +346,38 @@ static int ieee80211_open(struct net_device *dev) return res; } -static int ieee80211_stop(struct net_device *dev) +static int ieee80211_open(struct net_device *dev) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + int err; + + /* fail early if user set an invalid address */ + if (!is_zero_ether_addr(dev->dev_addr) && + !is_valid_ether_addr(dev->dev_addr)) + return -EADDRNOTAVAIL; + + err = ieee80211_check_concurrent_iface(sdata, sdata->vif.type); + if (err) + return err; + + return ieee80211_do_open(dev, true); +} + +static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, + bool going_down) +{ struct ieee80211_local *local = sdata->local; unsigned long flags; struct sk_buff *skb, *tmp; u32 hw_reconf_flags = 0; int i; + clear_bit(SDATA_STATE_RUNNING, &sdata->state); + /* * Stop TX on this interface first. */ - netif_tx_stop_all_queues(dev); + netif_tx_stop_all_queues(sdata->dev); /* * Purge work for this interface. @@ -394,11 +415,12 @@ static int ieee80211_stop(struct net_device *dev) if (sdata->vif.type == NL80211_IFTYPE_AP) local->fif_pspoll--; - netif_addr_lock_bh(dev); + netif_addr_lock_bh(sdata->dev); spin_lock_bh(&local->filter_lock); - __hw_addr_unsync(&local->mc_list, &dev->mc, dev->addr_len); + __hw_addr_unsync(&local->mc_list, &sdata->dev->mc, + sdata->dev->addr_len); spin_unlock_bh(&local->filter_lock); - netif_addr_unlock_bh(dev); + netif_addr_unlock_bh(sdata->dev); ieee80211_configure_filter(local); @@ -432,7 +454,8 @@ static int ieee80211_stop(struct net_device *dev) WARN_ON(!list_empty(&sdata->u.ap.vlans)); } - local->open_count--; + if (going_down) + local->open_count--; switch (sdata->vif.type) { case NL80211_IFTYPE_AP_VLAN: @@ -504,7 +527,8 @@ static int ieee80211_stop(struct net_device *dev) */ ieee80211_free_keys(sdata); - drv_remove_interface(local, &sdata->vif); + if (going_down) + drv_remove_interface(local, &sdata->vif); } sdata->bss = NULL; @@ -540,6 +564,13 @@ static int ieee80211_stop(struct net_device *dev) } } spin_unlock_irqrestore(&local->queue_stop_reason_lock, flags); +} + +static int ieee80211_stop(struct net_device *dev) +{ + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); + + ieee80211_do_stop(sdata, true); return 0; } @@ -857,9 +888,72 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, ieee80211_debugfs_add_netdev(sdata); } +static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, + enum nl80211_iftype type) +{ + struct ieee80211_local *local = sdata->local; + int ret, err; + + ASSERT_RTNL(); + + if (!local->ops->change_interface) + return -EBUSY; + + switch (sdata->vif.type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could maybe also all others here? + * Just not sure how that interacts + * with the RX/config path e.g. for + * mesh. + */ + break; + default: + return -EBUSY; + } + + switch (type) { + case NL80211_IFTYPE_AP: + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + /* + * Could probably support everything + * but WDS here (WDS do_open can fail + * under memory pressure, which this + * code isn't prepared to handle). + */ + break; + default: + return -EBUSY; + } + + ret = ieee80211_check_concurrent_iface(sdata, type); + if (ret) + return ret; + + ieee80211_do_stop(sdata, false); + + ieee80211_teardown_sdata(sdata->dev); + + ret = drv_change_interface(local, sdata, type); + if (ret) + type = sdata->vif.type; + + ieee80211_setup_sdata(sdata, type); + + err = ieee80211_do_open(sdata->dev, false); + WARN(err, "type change: do_open returned %d", err); + + return ret; +} + int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, enum nl80211_iftype type) { + int ret; + ASSERT_RTNL(); if (type == sdata->vif.type) @@ -870,18 +964,15 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, type == NL80211_IFTYPE_ADHOC) return -EOPNOTSUPP; - /* - * We could, here, on changes between IBSS/STA/MESH modes, - * invoke an MLME function instead that disassociates etc. - * and goes into the requested mode. - */ - - if (ieee80211_sdata_running(sdata)) - return -EBUSY; - - /* Purge and reset type-dependent state. */ - ieee80211_teardown_sdata(sdata->dev); - ieee80211_setup_sdata(sdata, type); + if (ieee80211_sdata_running(sdata)) { + ret = ieee80211_runtime_change_iftype(sdata, type); + if (ret) + return ret; + } else { + /* Purge and reset type-dependent state. */ + ieee80211_teardown_sdata(sdata->dev); + ieee80211_setup_sdata(sdata, type); + } /* reset some values that shouldn't be kept across type changes */ sdata->vif.bss_conf.basic_rates = -- cgit v1.2.3 From dca43c75e7e545694a9dd6288553f55c53e2a3a3 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 27 Aug 2010 19:13:28 +0000 Subject: tcp: Add TCP_USER_TIMEOUT socket option. This patch provides a "user timeout" support as described in RFC793. The socket option is also needed for the the local half of RFC5482 "TCP User Timeout Option". TCP_USER_TIMEOUT is a TCP level socket option that takes an unsigned int, when > 0, to specify the maximum amount of time in ms that transmitted data may remain unacknowledged before TCP will forcefully close the corresponding connection and return ETIMEDOUT to the application. If 0 is given, TCP will continue to use the system default. Increasing the user timeouts allows a TCP connection to survive extended periods without end-to-end connectivity. Decreasing the user timeouts allows applications to "fail fast" if so desired. Otherwise it may take upto 20 minutes with the current system defaults in a normal WAN environment. The socket option can be made during any state of a TCP connection, but is only effective during the synchronized states of a connection (ESTABLISHED, FIN-WAIT-1, FIN-WAIT-2, CLOSE-WAIT, CLOSING, or LAST-ACK). Moreover, when used with the TCP keepalive (SO_KEEPALIVE) option, TCP_USER_TIMEOUT will overtake keepalive to determine when to close a connection due to keepalive failure. The option does not change in anyway when TCP retransmits a packet, nor when a keepalive probe will be sent. This option, like many others, will be inherited by an acceptor from its listener. Signed-off-by: H.K. Jerry Chu Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + include/net/inet_connection_sock.h | 1 + net/ipv4/tcp.c | 11 ++++++++++- net/ipv4/tcp_timer.c | 40 ++++++++++++++++++++++++-------------- 4 files changed, 37 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index a778ee02459..e64f4c67d0e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -105,6 +105,7 @@ enum { #define TCP_COOKIE_TRANSACTIONS 15 /* TCP Cookie Transactions */ #define TCP_THIN_LINEAR_TIMEOUTS 16 /* Use linear timeouts for thin streams*/ #define TCP_THIN_DUPACK 17 /* Fast retrans. after 1 dupack */ +#define TCP_USER_TIMEOUT 18 /* How long for loss retry before timeout */ /* for TCP_INFO socket option */ #define TCPI_OPT_TIMESTAMPS 1 diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index b6d3b55da19..e4f494b42e0 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -125,6 +125,7 @@ struct inet_connection_sock { int probe_size; } icsk_mtup; u32 icsk_ca_priv[16]; + u32 icsk_user_timeout; #define ICSK_CA_PRIV_SIZE (16 * sizeof(u32)) }; diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 176e11aaea7..cf325452875 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -2391,7 +2391,12 @@ static int do_tcp_setsockopt(struct sock *sk, int level, err = tp->af_specific->md5_parse(sk, optval, optlen); break; #endif - + case TCP_USER_TIMEOUT: + /* Cap the max timeout in ms TCP will retry/retrans + * before giving up and aborting (ETIMEDOUT) a connection. + */ + icsk->icsk_user_timeout = msecs_to_jiffies(val); + break; default: err = -ENOPROTOOPT; break; @@ -2610,6 +2615,10 @@ static int do_tcp_getsockopt(struct sock *sk, int level, case TCP_THIN_DUPACK: val = tp->thin_dupack; break; + + case TCP_USER_TIMEOUT: + val = jiffies_to_msecs(icsk->icsk_user_timeout); + break; default: return -ENOPROTOOPT; } diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index 808bb920c9f..11569deccbe 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -138,10 +138,10 @@ static void tcp_mtu_probing(struct inet_connection_sock *icsk, struct sock *sk) * retransmissions with an initial RTO of TCP_RTO_MIN. */ static bool retransmits_timed_out(struct sock *sk, - unsigned int boundary) + unsigned int boundary, + unsigned int timeout) { - unsigned int timeout, linear_backoff_thresh; - unsigned int start_ts; + unsigned int linear_backoff_thresh, start_ts; if (!inet_csk(sk)->icsk_retransmits) return false; @@ -151,14 +151,15 @@ static bool retransmits_timed_out(struct sock *sk, else start_ts = tcp_sk(sk)->retrans_stamp; - linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); - - if (boundary <= linear_backoff_thresh) - timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; - else - timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + - (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + if (likely(timeout == 0)) { + linear_backoff_thresh = ilog2(TCP_RTO_MAX/TCP_RTO_MIN); + if (boundary <= linear_backoff_thresh) + timeout = ((2 << boundary) - 1) * TCP_RTO_MIN; + else + timeout = ((2 << linear_backoff_thresh) - 1) * TCP_RTO_MIN + + (boundary - linear_backoff_thresh) * TCP_RTO_MAX; + } return (tcp_time_stamp - start_ts) >= timeout; } @@ -174,7 +175,7 @@ static int tcp_write_timeout(struct sock *sk) dst_negative_advice(sk); retry_until = icsk->icsk_syn_retries ? : sysctl_tcp_syn_retries; } else { - if (retransmits_timed_out(sk, sysctl_tcp_retries1)) { + if (retransmits_timed_out(sk, sysctl_tcp_retries1, 0)) { /* Black hole detection */ tcp_mtu_probing(icsk, sk); @@ -187,14 +188,16 @@ static int tcp_write_timeout(struct sock *sk) retry_until = tcp_orphan_retries(sk, alive); do_reset = alive || - !retransmits_timed_out(sk, retry_until); + !retransmits_timed_out(sk, retry_until, 0); if (tcp_out_of_resources(sk, do_reset)) return 1; } } - if (retransmits_timed_out(sk, retry_until)) { + if (retransmits_timed_out(sk, retry_until, + (1 << sk->sk_state) & (TCPF_SYN_SENT | TCPF_SYN_RECV) ? 0 : + icsk->icsk_user_timeout)) { /* Has it gone just too far? */ tcp_write_err(sk); return 1; @@ -436,7 +439,7 @@ out_reset_timer: icsk->icsk_rto = min(icsk->icsk_rto << 1, TCP_RTO_MAX); } inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, icsk->icsk_rto, TCP_RTO_MAX); - if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1)) + if (retransmits_timed_out(sk, sysctl_tcp_retries1 + 1, 0)) __sk_dst_reset(sk); out:; @@ -556,7 +559,14 @@ static void tcp_keepalive_timer (unsigned long data) elapsed = keepalive_time_elapsed(tp); if (elapsed >= keepalive_time_when(tp)) { - if (icsk->icsk_probes_out >= keepalive_probes(tp)) { + /* If the TCP_USER_TIMEOUT option is enabled, use that + * to determine when to timeout instead. + */ + if ((icsk->icsk_user_timeout != 0 && + elapsed >= icsk->icsk_user_timeout && + icsk->icsk_probes_out > 0) || + (icsk->icsk_user_timeout == 0 && + icsk->icsk_probes_out >= keepalive_probes(tp))) { tcp_send_active_reset(sk, GFP_ATOMIC); tcp_write_err(sk); goto out; -- cgit v1.2.3 From 22b71c8f4f3db8df92f5e7b081c265bc56c0bd2f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:23:12 +0000 Subject: tcp/dccp: Consolidate common code for RFC 3390 conversion This patch consolidates initial-window code common to TCP and CCID-2: * TCP uses RFC 3390 in a packet-oriented manner (tcp_input.c) and * CCID-2 uses RFC 3390 in packet-oriented manner (RFC 4341). Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 15 +++++++++++++++ net/dccp/ccids/ccid2.c | 8 ++------ net/ipv4/tcp_input.c | 17 ++--------------- 3 files changed, 19 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index df6a2eb2019..a64022199b6 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -779,6 +779,21 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* Use define here intentionally to get WARN_ON location shown at the caller */ #define tcp_verify_left_out(tp) WARN_ON(tcp_left_out(tp) > tp->packets_out) +/* + * Convert RFC 3390 larger initial window into an equivalent number of packets. + * + * John Heffner states: + * + * The RFC specifies a window of no more than 4380 bytes + * unless 2*MSS > 4380. Reading the pseudocode in the RFC + * is a bit misleading because they use a clamp at 4380 bytes + * rather than a multiplier in the relevant range. + */ +static inline u32 rfc3390_bytes_to_packets(const u32 smss) +{ + return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); +} + extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); extern __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst); diff --git a/net/dccp/ccids/ccid2.c b/net/dccp/ccids/ccid2.c index 8c95813bcc6..b9c942a09c9 100644 --- a/net/dccp/ccids/ccid2.c +++ b/net/dccp/ccids/ccid2.c @@ -641,12 +641,8 @@ static int ccid2_hc_tx_init(struct ccid *ccid, struct sock *sk) /* RFC 4341, 5: initialise ssthresh to arbitrarily high (max) value */ hc->tx_ssthresh = ~0U; - /* - * RFC 4341, 5: "The cwnd parameter is initialized to at most four - * packets for new connections, following the rules from [RFC3390]". - * We need to convert the bytes of RFC3390 into the packets of RFC 4341. - */ - hc->tx_cwnd = clamp(4380U / dp->dccps_mss_cache, 2U, 4U); + /* Use larger initial windows (RFC 4341, section 5). */ + hc->tx_cwnd = rfc3390_bytes_to_packets(dp->dccps_mss_cache); /* Make sure that Ack Ratio is enabled and within bounds. */ max_ratio = DIV_ROUND_UP(hc->tx_cwnd, 2); diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e663b78a2ef..1bc87a05c73 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -805,25 +805,12 @@ void tcp_update_metrics(struct sock *sk) } } -/* Numbers are taken from RFC3390. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than use a multiplier in the relevant range. - */ __u32 tcp_init_cwnd(struct tcp_sock *tp, struct dst_entry *dst) { __u32 cwnd = (dst ? dst_metric(dst, RTAX_INITCWND) : 0); - if (!cwnd) { - if (tp->mss_cache > 1460) - cwnd = 2; - else - cwnd = (tp->mss_cache > 1095) ? 3 : 4; - } + if (!cwnd) + cwnd = rfc3390_bytes_to_packets(tp->mss_cache); return min_t(__u32, cwnd, tp->snd_cwnd_clamp); } -- cgit v1.2.3 From 3d5b99ae82f8742e3bb1f8634fd11ac36ea19ee1 Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sun, 29 Aug 2010 19:27:34 +0000 Subject: TCP: update initial windows according to RFC 5681 This updates the use of larger initial windows, as originally specified in RFC 3390, to use the newer IW values specified in RFC 5681, section 3.1. The changes made in RFC 5681 are: a) the setting now is more clearly specified in units of segments (as the comments by John Heffner emphasized, this was not very clear in RFC 3390); b) for connections with 1095 < SMSS <= 2190 there is now a change: - RFC 3390 says that IW <= 4380, - RFC 5681 says that IW = 3 * SMSS <= 6570. Since RFC 3390 is older and "only" proposed standard, whereas the newer RFC 5681 is already draft standard, it seems preferable to use the newer IW variant. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/tcp.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index a64022199b6..11dbacc886c 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -781,17 +781,11 @@ static inline __u32 tcp_current_ssthresh(const struct sock *sk) /* * Convert RFC 3390 larger initial window into an equivalent number of packets. - * - * John Heffner states: - * - * The RFC specifies a window of no more than 4380 bytes - * unless 2*MSS > 4380. Reading the pseudocode in the RFC - * is a bit misleading because they use a clamp at 4380 bytes - * rather than a multiplier in the relevant range. + * This is based on the numbers specified in RFC 5681, 3.1. */ static inline u32 rfc3390_bytes_to_packets(const u32 smss) { - return smss <= 1095 ? 4 : (smss > 1460 ? 2 : 3); + return smss <= 1095 ? 4 : (smss > 2190 ? 2 : 3); } extern void tcp_enter_cwr(struct sock *sk, const int set_ssthresh); -- cgit v1.2.3 From e3634169bcc0cce33c815865d62ab378739f7389 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 7 Sep 2010 05:55:38 +0000 Subject: include/net/raw.h: Convert raw_seq_private macro to inline Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/raw.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/raw.h b/include/net/raw.h index 43c57502659..42ce6fe7a2d 100644 --- a/include/net/raw.h +++ b/include/net/raw.h @@ -45,7 +45,10 @@ struct raw_iter_state { struct raw_hashinfo *h; }; -#define raw_seq_private(seq) ((struct raw_iter_state *)(seq)->private) +static inline struct raw_iter_state *raw_seq_private(struct seq_file *seq) +{ + return seq->private; +} void *raw_seq_start(struct seq_file *seq, loff_t *pos); void *raw_seq_next(struct seq_file *seq, void *v, loff_t *pos); void raw_seq_stop(struct seq_file *seq, void *v); -- cgit v1.2.3 From 2944f45d9db851e186774df7c9cbf075f4a585c6 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Tue, 14 Sep 2010 18:37:20 +0200 Subject: mac80211: add a note about iterating interfaces during add_interface() Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index f91fc331369..19a5cb4a658 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2293,6 +2293,7 @@ void ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted); * This function allows the iterator function to sleep, when the iterator * function is atomic @ieee80211_iterate_active_interfaces_atomic can * be used. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call @@ -2310,6 +2311,7 @@ void ieee80211_iterate_active_interfaces(struct ieee80211_hw *hw, * hardware that are currently active and calls the callback for them. * This function requires the iterator callback function to be atomic, * if that is not desired, use @ieee80211_iterate_active_interfaces instead. + * Does not iterate over a new interface during add_interface() * * @hw: the hardware struct of which the interfaces should be iterated over * @iterator: the iterator function to call, cannot sleep -- cgit v1.2.3 From 55b1804c678e679e1018671bd40b583eab124689 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 13 Sep 2010 18:24:01 +0000 Subject: net/irda: Use static const char * const where possible Signed-off-by: Joe Perches Signed-off-by: David S. Miller --- include/net/irda/irlan_event.h | 2 +- net/irda/irlan/irlan_event.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/irda/irlan_event.h b/include/net/irda/irlan_event.h index 6d9539f0580..018b5a77e61 100644 --- a/include/net/irda/irlan_event.h +++ b/include/net/irda/irlan_event.h @@ -67,7 +67,7 @@ typedef enum { IRLAN_WATCHDOG_TIMEOUT, } IRLAN_EVENT; -extern char *irlan_state[]; +extern const char * const irlan_state[]; void irlan_do_client_event(struct irlan_cb *self, IRLAN_EVENT event, struct sk_buff *skb); diff --git a/net/irda/irlan/irlan_event.c b/net/irda/irlan/irlan_event.c index cbcb4eb5403..43f16040a6f 100644 --- a/net/irda/irlan/irlan_event.c +++ b/net/irda/irlan/irlan_event.c @@ -24,7 +24,7 @@ #include -char *irlan_state[] = { +const char * const irlan_state[] = { "IRLAN_IDLE", "IRLAN_QUERY", "IRLAN_CONN", -- cgit v1.2.3 From 6482f554e2b9cbe733d63124765104f29cf0c9ad Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:19:53 +0000 Subject: Phonet: remove dangling pipe if an endpoint is closed early MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Closing a pipe endpoint is not normally allowed by the Phonet pipe, other than as a side after-effect of removing the pipe between two endpoints. But there is no way to prevent Linux userspace processes from being killed or suffering from bugs, so this can still happen. We might as well forcefully close Phonet pipe endpoints then. The cellular modem supports only a few existing pipes at a time. So we really should not leak them. This change instructs the modem to destroy the pipe if either of the pipe's endpoint (Linux socket) is closed too early. Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pep.h | 5 +++++ net/phonet/pep.c | 27 ++++++++++++++++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 35672b1cf44..37f23dc05de 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -77,6 +77,11 @@ static inline struct pnpipehdr *pnp_hdr(struct sk_buff *skb) #define MAX_PNPIPE_HEADER (MAX_PHONET_HEADER + 4) enum { + PNS_PIPE_CREATE_REQ = 0x00, + PNS_PIPE_CREATE_RESP, + PNS_PIPE_REMOVE_REQ, + PNS_PIPE_REMOVE_RESP, + PNS_PIPE_DATA = 0x20, PNS_PIPE_ALIGNED_DATA, diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 04e34196c9d..d0e7eb24c8b 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -620,6 +620,28 @@ drop: return err; } +static int pipe_do_remove(struct sock *sk) +{ + struct pep_sock *pn = pep_sk(sk); + struct pnpipehdr *ph; + struct sk_buff *skb; + + skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_KERNEL); + if (!skb) + return -ENOMEM; + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = 0; + ph->message_id = PNS_PIPE_REMOVE_REQ; + ph->pipe_handle = pn->pipe_handle; + ph->data[0] = PAD; + + return pn_skb_send(sk, skb, &pipe_srv); +} + /* associated socket ceases to exist */ static void pep_sock_close(struct sock *sk, long timeout) { @@ -638,7 +660,10 @@ static void pep_sock_close(struct sock *sk, long timeout) sk_for_each_safe(sknode, p, n, &pn->ackq) sk_del_node_init(sknode); sk->sk_state = TCP_CLOSE; - } + } else if ((1 << sk->sk_state) & (TCPF_SYN_RECV|TCPF_ESTABLISHED)) + /* Forcefully remove dangling Phonet pipe */ + pipe_do_remove(sk); + ifindex = pn->ifindex; pn->ifindex = 0; release_sock(sk); -- cgit v1.2.3 From 4e3d16ce5e82648d7f4dfd28b6cf8fe2e9a9efc3 Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:30:11 +0000 Subject: Phonet: resource routing backend MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When both destination device and object are nul, Phonet routes the packet according to the resource field. In fact, this is the most common pattern when sending Phonet "request" packets. In this case, the packet is delivered to whichever endpoint (socket) has registered the resource. This adds a new table so that Linux processes can register their Phonet sockets to Phonet resources, if they have adequate privileges. (Namespace support is not implemented at the moment.) Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/phonet.h | 5 +++ net/phonet/socket.c | 88 +++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 93 insertions(+) (limited to 'include/net') diff --git a/include/net/phonet/phonet.h b/include/net/phonet/phonet.h index 7b114079a51..d5df797f954 100644 --- a/include/net/phonet/phonet.h +++ b/include/net/phonet/phonet.h @@ -54,6 +54,11 @@ void pn_sock_hash(struct sock *sk); void pn_sock_unhash(struct sock *sk); int pn_sock_get_port(struct sock *sk, unsigned short sport); +struct sock *pn_find_sock_by_res(struct net *net, u8 res); +int pn_sock_bind_res(struct sock *sock, u8 res); +int pn_sock_unbind_res(struct sock *sk, u8 res); +void pn_sock_unbind_all_res(struct sock *sk); + int pn_skb_send(struct sock *sk, struct sk_buff *skb, const struct sockaddr_pn *target); diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 7c91f739f13..4c29a23e900 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -565,3 +565,91 @@ const struct file_operations pn_sock_seq_fops = { .release = seq_release_net, }; #endif + +static struct { + struct sock *sk[256]; +} pnres; + +/* + * Find and hold socket based on resource. + */ +struct sock *pn_find_sock_by_res(struct net *net, u8 res) +{ + struct sock *sk; + + if (!net_eq(net, &init_net)) + return NULL; + + rcu_read_lock(); + sk = rcu_dereference(pnres.sk[res]); + if (sk) + sock_hold(sk); + rcu_read_unlock(); + return sk; +} + +static DEFINE_MUTEX(resource_mutex); + +int pn_sock_bind_res(struct sock *sk, u8 res) +{ + int ret = -EADDRINUSE; + + if (!net_eq(sock_net(sk), &init_net)) + return -ENOIOCTLCMD; + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + if (pn_socket_autobind(sk->sk_socket)) + return -EAGAIN; + + mutex_lock(&resource_mutex); + if (pnres.sk[res] == NULL) { + sock_hold(sk); + rcu_assign_pointer(pnres.sk[res], sk); + ret = 0; + } + mutex_unlock(&resource_mutex); + return ret; +} + +int pn_sock_unbind_res(struct sock *sk, u8 res) +{ + int ret = -ENOENT; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&resource_mutex); + if (pnres.sk[res] == sk) { + rcu_assign_pointer(pnres.sk[res], NULL); + ret = 0; + } + mutex_unlock(&resource_mutex); + + if (ret == 0) { + synchronize_rcu(); + sock_put(sk); + } + return ret; +} + +void pn_sock_unbind_all_res(struct sock *sk) +{ + unsigned res, match = 0; + + mutex_lock(&resource_mutex); + for (res = 0; res < 256; res++) { + if (pnres.sk[res] == sk) { + rcu_assign_pointer(pnres.sk[res], NULL); + match++; + } + } + mutex_unlock(&resource_mutex); + + if (match == 0) + return; + synchronize_rcu(); + while (match > 0) { + sock_put(sk); + match--; + } +} -- cgit v1.2.3 From 507215f8d04f9e61f38c975e61d93bcafd30815f Mon Sep 17 00:00:00 2001 From: Rémi Denis-Courmont Date: Wed, 15 Sep 2010 12:30:14 +0000 Subject: Phonet: list subscribed resources via proc_fs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/net/phonet/pn_dev.h | 1 + net/phonet/pn_dev.c | 2 + net/phonet/socket.c | 96 +++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 99 insertions(+) (limited to 'include/net') diff --git a/include/net/phonet/pn_dev.h b/include/net/phonet/pn_dev.h index 2d16783d5e2..13649eb5741 100644 --- a/include/net/phonet/pn_dev.h +++ b/include/net/phonet/pn_dev.h @@ -57,5 +57,6 @@ struct net_device *phonet_route_output(struct net *net, u8 daddr); #define PN_NO_ADDR 0xff extern const struct file_operations pn_sock_seq_fops; +extern const struct file_operations pn_res_seq_fops; #endif diff --git a/net/phonet/pn_dev.c b/net/phonet/pn_dev.c index d0a42945937..947038ddd04 100644 --- a/net/phonet/pn_dev.c +++ b/net/phonet/pn_dev.c @@ -373,6 +373,7 @@ int __init phonet_device_init(void) if (err) return err; + proc_net_fops_create(&init_net, "pnresource", 0, &pn_res_seq_fops); register_netdevice_notifier(&phonet_device_notifier); err = phonet_netlink_register(); if (err) @@ -385,6 +386,7 @@ void phonet_device_exit(void) rtnl_unregister_all(PF_PHONET); unregister_netdevice_notifier(&phonet_device_notifier); unregister_pernet_device(&phonet_net_ops); + proc_net_remove(&init_net, "pnresource"); } int phonet_route_add(struct net_device *dev, u8 daddr) diff --git a/net/phonet/socket.c b/net/phonet/socket.c index d4f41afc058..6bf6e3c97d5 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -654,3 +654,99 @@ void pn_sock_unbind_all_res(struct sock *sk) match--; } } + +#ifdef CONFIG_PROC_FS +static struct sock **pn_res_get_idx(struct seq_file *seq, loff_t pos) +{ + struct net *net = seq_file_net(seq); + unsigned i; + + if (!net_eq(net, &init_net)) + return NULL; + + for (i = 0; i < 256; i++) { + if (pnres.sk[i] == NULL) + continue; + if (!pos) + return pnres.sk + i; + pos--; + } + return NULL; +} + +static struct sock **pn_res_get_next(struct seq_file *seq, struct sock **sk) +{ + struct net *net = seq_file_net(seq); + unsigned i; + + BUG_ON(!net_eq(net, &init_net)); + + for (i = (sk - pnres.sk) + 1; i < 256; i++) + if (pnres.sk[i]) + return pnres.sk + i; + return NULL; +} + +static void *pn_res_seq_start(struct seq_file *seq, loff_t *pos) + __acquires(resource_mutex) +{ + mutex_lock(&resource_mutex); + return *pos ? pn_res_get_idx(seq, *pos - 1) : SEQ_START_TOKEN; +} + +static void *pn_res_seq_next(struct seq_file *seq, void *v, loff_t *pos) +{ + struct sock **sk; + + if (v == SEQ_START_TOKEN) + sk = pn_res_get_idx(seq, 0); + else + sk = pn_res_get_next(seq, v); + (*pos)++; + return sk; +} + +static void pn_res_seq_stop(struct seq_file *seq, void *v) + __releases(resource_mutex) +{ + mutex_unlock(&resource_mutex); +} + +static int pn_res_seq_show(struct seq_file *seq, void *v) +{ + int len; + + if (v == SEQ_START_TOKEN) + seq_printf(seq, "%s%n", "rs uid inode", &len); + else { + struct sock **psk = v; + struct sock *sk = *psk; + + seq_printf(seq, "%02X %5d %lu%n", + psk - pnres.sk, sock_i_uid(sk), sock_i_ino(sk), &len); + } + seq_printf(seq, "%*s\n", 63 - len, ""); + return 0; +} + +static const struct seq_operations pn_res_seq_ops = { + .start = pn_res_seq_start, + .next = pn_res_seq_next, + .stop = pn_res_seq_stop, + .show = pn_res_seq_show, +}; + +static int pn_res_open(struct inode *inode, struct file *file) +{ + return seq_open_net(inode, file, &pn_res_seq_ops, + sizeof(struct seq_net_private)); +} + +const struct file_operations pn_res_seq_fops = { + .owner = THIS_MODULE, + .open = pn_res_open, + .read = seq_read, + .llseek = seq_lseek, + .release = seq_release_net, +}; +#endif -- cgit v1.2.3 From 9c376639297d3dd82d40e54c9cdca8da9dfc22f1 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Fri, 20 Aug 2010 15:13:59 -0700 Subject: include/net/cfg80211.h: wiphy_ messages use dev_printk The output becomes: [ 41.261941] ieee80211 phy0: Selected rate control algorithm 'minstrel_ht' Signed-off-by: Joe Perches Signed-off-by: John W. Linville --- include/net/cfg80211.h | 37 ++++++++++++------------------------- net/wireless/core.c | 49 ------------------------------------------------- 2 files changed, 12 insertions(+), 74 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 4c8c727d0cc..a0613ff62c9 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2558,49 +2558,36 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, /* wiphy_printk helpers, similar to dev_printk */ #define wiphy_printk(level, wiphy, format, args...) \ - printk(level "%s: " format, wiphy_name(wiphy), ##args) + dev_printk(level, &(wiphy)->dev, format, ##args) #define wiphy_emerg(wiphy, format, args...) \ - wiphy_printk(KERN_EMERG, wiphy, format, ##args) + dev_emerg(&(wiphy)->dev, format, ##args) #define wiphy_alert(wiphy, format, args...) \ - wiphy_printk(KERN_ALERT, wiphy, format, ##args) + dev_alert(&(wiphy)->dev, format, ##args) #define wiphy_crit(wiphy, format, args...) \ - wiphy_printk(KERN_CRIT, wiphy, format, ##args) + dev_crit(&(wiphy)->dev, format, ##args) #define wiphy_err(wiphy, format, args...) \ - wiphy_printk(KERN_ERR, wiphy, format, ##args) + dev_err(&(wiphy)->dev, format, ##args) #define wiphy_warn(wiphy, format, args...) \ - wiphy_printk(KERN_WARNING, wiphy, format, ##args) + dev_warn(&(wiphy)->dev, format, ##args) #define wiphy_notice(wiphy, format, args...) \ - wiphy_printk(KERN_NOTICE, wiphy, format, ##args) + dev_notice(&(wiphy)->dev, format, ##args) #define wiphy_info(wiphy, format, args...) \ - wiphy_printk(KERN_INFO, wiphy, format, ##args) + dev_info(&(wiphy)->dev, format, ##args) -int wiphy_debug(const struct wiphy *wiphy, const char *format, ...) - __attribute__ ((format (printf, 2, 3))); - -#if defined(DEBUG) -#define wiphy_dbg(wiphy, format, args...) \ +#define wiphy_debug(wiphy, format, args...) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args) -#elif defined(CONFIG_DYNAMIC_DEBUG) + #define wiphy_dbg(wiphy, format, args...) \ - dynamic_pr_debug("%s: " format, wiphy_name(wiphy), ##args) -#else -#define wiphy_dbg(wiphy, format, args...) \ -({ \ - if (0) \ - wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ -}) -#endif + dev_dbg(&(wiphy)->dev, format, ##args) #if defined(VERBOSE_DEBUG) #define wiphy_vdbg wiphy_dbg #else - #define wiphy_vdbg(wiphy, format, args...) \ ({ \ if (0) \ wiphy_printk(KERN_DEBUG, wiphy, format, ##args); \ - 0; \ + 0; \ }) #endif diff --git a/net/wireless/core.c b/net/wireless/core.c index d52630bbab0..b8191cf8622 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -912,52 +912,3 @@ static void __exit cfg80211_exit(void) destroy_workqueue(cfg80211_wq); } module_exit(cfg80211_exit); - -static int ___wiphy_printk(const char *level, const struct wiphy *wiphy, - struct va_format *vaf) -{ - if (!wiphy) - return printk("%s(NULL wiphy *): %pV", level, vaf); - - return printk("%s%s: %pV", level, wiphy_name(wiphy), vaf); -} - -int __wiphy_printk(const char *level, const struct wiphy *wiphy, - const char *fmt, ...) -{ - struct va_format vaf; - va_list args; - int r; - - va_start(args, fmt); - - vaf.fmt = fmt; - vaf.va = &args; - - r = ___wiphy_printk(level, wiphy, &vaf); - va_end(args); - - return r; -} -EXPORT_SYMBOL(__wiphy_printk); - -#define define_wiphy_printk_level(func, kern_level) \ -int func(const struct wiphy *wiphy, const char *fmt, ...) \ -{ \ - struct va_format vaf; \ - va_list args; \ - int r; \ - \ - va_start(args, fmt); \ - \ - vaf.fmt = fmt; \ - vaf.va = &args; \ - \ - r = ___wiphy_printk(kern_level, wiphy, &vaf); \ - va_end(args); \ - \ - return r; \ -} \ -EXPORT_SYMBOL(func); - -define_wiphy_printk_level(wiphy_debug, KERN_DEBUG); -- cgit v1.2.3 From 2ca27bcff7127da1aa7dd39cd2a6f7cb187e327f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 16 Sep 2010 14:58:23 +0200 Subject: mac80211: add p2p device type support When a driver advertises p2p device support, mac80211 will handle it, but internally it will rewrite the interface type to STA/AP rather than P2P-STA/GO since otherwise a lot of paths need to be touched that are otherwise identical. A p2p boolean tells drivers whether or not a given interface will be used for p2p or not. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/mac80211_hwsim.c | 15 +++++++++++---- include/net/mac80211.h | 27 ++++++++++++++++++++++++++- net/mac80211/cfg.c | 25 ++++++++++++++++++------- net/mac80211/driver-ops.h | 6 +++--- net/mac80211/driver-trace.h | 21 +++++++++++++-------- net/mac80211/iface.c | 29 ++++++++++++++++++++++++++--- net/mac80211/main.c | 15 +++++++++++++++ net/mac80211/rx.c | 4 +--- net/mac80211/util.c | 18 ++++-------------- 9 files changed, 117 insertions(+), 43 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 92b486d46eb..7eaaa3bab54 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -595,7 +595,8 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif) { wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, vif->type, vif->addr); + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_set_magic(vif); return 0; } @@ -603,11 +604,14 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw, static int mac80211_hwsim_change_interface(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum nl80211_iftype newtype) + enum nl80211_iftype newtype, + bool newp2p) { + newtype = ieee80211_iftype_p2p(newtype, newp2p); wiphy_debug(hw->wiphy, "%s (old type=%d, new type=%d, mac_addr=%pM)\n", - __func__, vif->type, newtype, vif->addr); + __func__, ieee80211_vif_type_p2p(vif), + newtype, vif->addr); hwsim_check_magic(vif); return 0; @@ -617,7 +621,8 @@ static void mac80211_hwsim_remove_interface( struct ieee80211_hw *hw, struct ieee80211_vif *vif) { wiphy_debug(hw->wiphy, "%s (type=%d mac_addr=%pM)\n", - __func__, vif->type, vif->addr); + __func__, ieee80211_vif_type_p2p(vif), + vif->addr); hwsim_check_magic(vif); hwsim_clear_magic(vif); } @@ -1310,6 +1315,8 @@ static int __init init_mac80211_hwsim(void) hw->wiphy->interface_modes = BIT(NL80211_IFTYPE_STATION) | BIT(NL80211_IFTYPE_AP) | + BIT(NL80211_IFTYPE_P2P_CLIENT) | + BIT(NL80211_IFTYPE_P2P_GO) | BIT(NL80211_IFTYPE_ADHOC) | BIT(NL80211_IFTYPE_MESH_POINT); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 19a5cb4a658..12a49f0ba32 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -769,6 +769,8 @@ struct ieee80211_channel_switch { * @bss_conf: BSS configuration for this interface, either our own * or the BSS we're associated to * @addr: address of this interface + * @p2p: indicates whether this AP or STA interface is a p2p + * interface, i.e. a GO or p2p-sta respectively * @drv_priv: data area for driver use, will always be aligned to * sizeof(void *). */ @@ -776,6 +778,7 @@ struct ieee80211_vif { enum nl80211_iftype type; struct ieee80211_bss_conf bss_conf; u8 addr[ETH_ALEN]; + bool p2p; /* must be last */ u8 drv_priv[0] __attribute__((__aligned__(sizeof(void *)))); }; @@ -1701,7 +1704,7 @@ struct ieee80211_ops { struct ieee80211_vif *vif); int (*change_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif, - enum nl80211_iftype new_type); + enum nl80211_iftype new_type, bool p2p); void (*remove_interface)(struct ieee80211_hw *hw, struct ieee80211_vif *vif); int (*config)(struct ieee80211_hw *hw, u32 changed); @@ -2721,4 +2724,26 @@ conf_is_ht(struct ieee80211_conf *conf) return conf->channel_type != NL80211_CHAN_NO_HT; } +static inline enum nl80211_iftype +ieee80211_iftype_p2p(enum nl80211_iftype type, bool p2p) +{ + if (p2p) { + switch (type) { + case NL80211_IFTYPE_STATION: + return NL80211_IFTYPE_P2P_CLIENT; + case NL80211_IFTYPE_AP: + return NL80211_IFTYPE_P2P_GO; + default: + break; + } + } + return type; +} + +static inline enum nl80211_iftype +ieee80211_vif_type_p2p(struct ieee80211_vif *vif) +{ + return ieee80211_iftype_p2p(vif->type, vif->p2p); +} + #endif /* MAC80211_H */ diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 171e8ff8e02..c981604b71e 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1151,15 +1151,26 @@ static int ieee80211_scan(struct wiphy *wiphy, struct net_device *dev, struct cfg80211_scan_request *req) { - struct ieee80211_sub_if_data *sdata; - - sdata = IEEE80211_DEV_TO_SUB_IF(dev); + struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); - if (sdata->vif.type != NL80211_IFTYPE_STATION && - sdata->vif.type != NL80211_IFTYPE_ADHOC && - sdata->vif.type != NL80211_IFTYPE_MESH_POINT && - (sdata->vif.type != NL80211_IFTYPE_AP || sdata->u.ap.beacon)) + switch (ieee80211_vif_type_p2p(&sdata->vif)) { + case NL80211_IFTYPE_STATION: + case NL80211_IFTYPE_ADHOC: + case NL80211_IFTYPE_MESH_POINT: + case NL80211_IFTYPE_P2P_CLIENT: + break; + case NL80211_IFTYPE_P2P_GO: + if (sdata->local->ops->hw_scan) + break; + /* FIXME: implement NoA while scanning in software */ + return -EOPNOTSUPP; + case NL80211_IFTYPE_AP: + if (sdata->u.ap.beacon) + return -EOPNOTSUPP; + break; + default: return -EOPNOTSUPP; + } return ieee80211_request_scan(sdata, req); } diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h index 6064b7b09e0..16983825f8e 100644 --- a/net/mac80211/driver-ops.h +++ b/net/mac80211/driver-ops.h @@ -56,14 +56,14 @@ static inline int drv_add_interface(struct ieee80211_local *local, static inline int drv_change_interface(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - enum nl80211_iftype type) + enum nl80211_iftype type, bool p2p) { int ret; might_sleep(); - trace_drv_change_interface(local, sdata, type); - ret = local->ops->change_interface(&local->hw, &sdata->vif, type); + trace_drv_change_interface(local, sdata, type, p2p); + ret = local->ops->change_interface(&local->hw, &sdata->vif, type, p2p); trace_drv_return_int(local, ret); return ret; } diff --git a/net/mac80211/driver-trace.h b/net/mac80211/driver-trace.h index f6f3d89e43f..6831fb1641c 100644 --- a/net/mac80211/driver-trace.h +++ b/net/mac80211/driver-trace.h @@ -25,12 +25,14 @@ static inline void trace_ ## name(proto) {} #define STA_PR_FMT " sta:%pM" #define STA_PR_ARG __entry->sta_addr -#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ +#define VIF_ENTRY __field(enum nl80211_iftype, vif_type) __field(void *, sdata) \ + __field(bool, p2p) \ __string(vif_name, sdata->dev ? sdata->dev->name : "") -#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ +#define VIF_ASSIGN __entry->vif_type = sdata->vif.type; __entry->sdata = sdata; \ + __entry->p2p = sdata->vif.p2p; \ __assign_str(vif_name, sdata->dev ? sdata->dev->name : "") -#define VIF_PR_FMT " vif:%s(%d)" -#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type +#define VIF_PR_FMT " vif:%s(%d%s)" +#define VIF_PR_ARG __get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : "" /* * Tracing for driver callbacks. @@ -139,25 +141,28 @@ TRACE_EVENT(drv_add_interface, TRACE_EVENT(drv_change_interface, TP_PROTO(struct ieee80211_local *local, struct ieee80211_sub_if_data *sdata, - enum nl80211_iftype type), + enum nl80211_iftype type, bool p2p), - TP_ARGS(local, sdata, type), + TP_ARGS(local, sdata, type, p2p), TP_STRUCT__entry( LOCAL_ENTRY VIF_ENTRY __field(u32, new_type) + __field(bool, new_p2p) ), TP_fast_assign( LOCAL_ASSIGN; VIF_ASSIGN; __entry->new_type = type; + __entry->new_p2p = p2p; ), TP_printk( - LOCAL_PR_FMT VIF_PR_FMT " new type:%d", - LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type + LOCAL_PR_FMT VIF_PR_FMT " new type:%d%s", + LOCAL_PR_ARG, VIF_PR_ARG, __entry->new_type, + __entry->new_p2p ? "/p2p" : "" ) ); diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index 95908aaa8a6..66785739dad 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -188,6 +188,8 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: /* cannot happen */ WARN_ON(1); break; @@ -844,6 +846,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, /* and set some type-dependent values */ sdata->vif.type = type; + sdata->vif.p2p = false; sdata->dev->netdev_ops = &ieee80211_dataif_ops; sdata->wdev.iftype = type; @@ -857,10 +860,20 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata, INIT_WORK(&sdata->work, ieee80211_iface_work); switch (type) { + case NL80211_IFTYPE_P2P_GO: + type = NL80211_IFTYPE_AP; + sdata->vif.type = type; + sdata->vif.p2p = true; + /* fall through */ case NL80211_IFTYPE_AP: skb_queue_head_init(&sdata->u.ap.ps_bc_buf); INIT_LIST_HEAD(&sdata->u.ap.vlans); break; + case NL80211_IFTYPE_P2P_CLIENT: + type = NL80211_IFTYPE_STATION; + sdata->vif.type = type; + sdata->vif.p2p = true; + /* fall through */ case NL80211_IFTYPE_STATION: ieee80211_sta_setup_sdata(sdata); break; @@ -894,6 +907,8 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, { struct ieee80211_local *local = sdata->local; int ret, err; + enum nl80211_iftype internal_type = type; + bool p2p = false; ASSERT_RTNL(); @@ -926,11 +941,19 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, * code isn't prepared to handle). */ break; + case NL80211_IFTYPE_P2P_CLIENT: + p2p = true; + internal_type = NL80211_IFTYPE_STATION; + break; + case NL80211_IFTYPE_P2P_GO: + p2p = true; + internal_type = NL80211_IFTYPE_AP; + break; default: return -EBUSY; } - ret = ieee80211_check_concurrent_iface(sdata, type); + ret = ieee80211_check_concurrent_iface(sdata, internal_type); if (ret) return ret; @@ -938,7 +961,7 @@ static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata, ieee80211_teardown_sdata(sdata->dev); - ret = drv_change_interface(local, sdata, type); + ret = drv_change_interface(local, sdata, internal_type, p2p); if (ret) type = sdata->vif.type; @@ -957,7 +980,7 @@ int ieee80211_if_change_type(struct ieee80211_sub_if_data *sdata, ASSERT_RTNL(); - if (type == sdata->vif.type) + if (type == ieee80211_vif_type_p2p(&sdata->vif)) return 0; /* Setting ad-hoc mode on non-IBSS channel is not supported. */ diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 7fb11485697..18fdeca43d9 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -459,6 +459,21 @@ ieee80211_default_mgmt_stypes[NUM_NL80211_IFTYPES] = { BIT(IEEE80211_STYPE_DEAUTH >> 4) | BIT(IEEE80211_STYPE_ACTION >> 4), }, + [NL80211_IFTYPE_P2P_CLIENT] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ACTION >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4), + }, + [NL80211_IFTYPE_P2P_GO] = { + .tx = 0xffff, + .rx = BIT(IEEE80211_STYPE_ASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) | + BIT(IEEE80211_STYPE_PROBE_REQ >> 4) | + BIT(IEEE80211_STYPE_DISASSOC >> 4) | + BIT(IEEE80211_STYPE_AUTH >> 4) | + BIT(IEEE80211_STYPE_DEAUTH >> 4) | + BIT(IEEE80211_STYPE_ACTION >> 4), + }, }; struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index ac205a33690..c0368152b72 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -2588,9 +2588,7 @@ static int prepare_for_handlers(struct ieee80211_sub_if_data *sdata, if (compare_ether_addr(sdata->u.wds.remote_addr, hdr->addr2)) return 0; break; - case NL80211_IFTYPE_MONITOR: - case NL80211_IFTYPE_UNSPECIFIED: - case NUM_NL80211_IFTYPES: + default: /* should never get here */ WARN_ON(1); break; diff --git a/net/mac80211/util.c b/net/mac80211/util.c index 9f21a69f091..737f4267c33 100644 --- a/net/mac80211/util.c +++ b/net/mac80211/util.c @@ -474,16 +474,10 @@ void ieee80211_iterate_active_interfaces( list_for_each_entry(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: continue; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_MESH_POINT: + default: break; } if (ieee80211_sdata_running(sdata)) @@ -508,16 +502,10 @@ void ieee80211_iterate_active_interfaces_atomic( list_for_each_entry_rcu(sdata, &local->interfaces, list) { switch (sdata->vif.type) { - case NUM_NL80211_IFTYPES: - case NL80211_IFTYPE_UNSPECIFIED: case NL80211_IFTYPE_MONITOR: case NL80211_IFTYPE_AP_VLAN: continue; - case NL80211_IFTYPE_AP: - case NL80211_IFTYPE_STATION: - case NL80211_IFTYPE_ADHOC: - case NL80211_IFTYPE_WDS: - case NL80211_IFTYPE_MESH_POINT: + default: break; } if (ieee80211_sdata_running(sdata)) @@ -1193,6 +1181,8 @@ int ieee80211_reconfig(struct ieee80211_local *local) break; case NL80211_IFTYPE_UNSPECIFIED: case NUM_NL80211_IFTYPES: + case NL80211_IFTYPE_P2P_CLIENT: + case NL80211_IFTYPE_P2P_GO: WARN_ON(1); break; } -- cgit v1.2.3 From 3575792e005dc9994f15ae72c1c6f401d134177d Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Fri, 17 Sep 2010 14:18:16 +0200 Subject: ipvs: extend connection flags to 32 bits - the sync protocol supports 16 bits only, so bits 0..15 should be used only for flags that should go to backup server, bits 16 and above should be allocated for flags not sent to backup. - use IP_VS_CONN_F_DEST_MASK as mask of connection flags in destination that can be changed by user space - allow IP_VS_CONN_F_ONE_PACKET to be set in destination Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/linux/ip_vs.h | 8 ++++++++ include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 16 ++++++++++------ net/netfilter/ipvs/ip_vs_core.c | 11 ++++++----- net/netfilter/ipvs/ip_vs_ctl.c | 5 +++-- 5 files changed, 28 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 9708de265bb..003d75f6ffe 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -70,6 +70,7 @@ /* * IPVS Connection Flags + * Only flags 0..15 are sent to backup server */ #define IP_VS_CONN_F_FWD_MASK 0x0007 /* mask for the fwd methods */ #define IP_VS_CONN_F_MASQ 0x0000 /* masquerading/NAT */ @@ -88,6 +89,13 @@ #define IP_VS_CONN_F_TEMPLATE 0x1000 /* template, not connection */ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ +/* Flags that are not sent to backup server start from bit 16 */ + +/* Connection flags from destination that can be changed by user space */ +#define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ + IP_VS_CONN_F_ONE_PACKET | \ + 0) + #define IP_VS_SCHEDNAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index f976885f686..62698a9c163 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -366,6 +366,7 @@ struct ip_vs_conn { union nf_inet_addr caddr; /* client address */ union nf_inet_addr vaddr; /* virtual address */ union nf_inet_addr daddr; /* destination address */ + volatile __u32 flags; /* status flags */ __be16 cport; __be16 vport; __be16 dport; @@ -378,7 +379,6 @@ struct ip_vs_conn { /* Flags and state transition */ spinlock_t lock; /* lock for state transition */ - volatile __u16 flags; /* status flags */ volatile __u16 state; /* state info */ volatile __u16 old_state; /* old state, to be used for * state transition triggerd diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index b71c69a2db1..9fe1da7bcf1 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -505,6 +505,8 @@ static inline int ip_vs_dest_totalconns(struct ip_vs_dest *dest) static inline void ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) { + unsigned int conn_flags; + /* if dest is NULL, then return directly */ if (!dest) return; @@ -512,16 +514,18 @@ ip_vs_bind_dest(struct ip_vs_conn *cp, struct ip_vs_dest *dest) /* Increase the refcnt counter of the dest */ atomic_inc(&dest->refcnt); + conn_flags = atomic_read(&dest->conn_flags); + if (cp->protocol != IPPROTO_UDP) + conn_flags &= ~IP_VS_CONN_F_ONE_PACKET; /* Bind with the destination and its corresponding transmitter */ - if ((cp->flags & IP_VS_CONN_F_SYNC) && - (!(cp->flags & IP_VS_CONN_F_TEMPLATE))) + if (cp->flags & IP_VS_CONN_F_SYNC) { /* if the connection is not template and is created * by sync, preserve the activity flag. */ - cp->flags |= atomic_read(&dest->conn_flags) & - (~IP_VS_CONN_F_INACTIVE); - else - cp->flags |= atomic_read(&dest->conn_flags); + if (!(cp->flags & IP_VS_CONN_F_TEMPLATE)) + conn_flags &= ~IP_VS_CONN_F_INACTIVE; + } + cp->flags |= conn_flags; cp->dest = dest; IP_VS_DBG_BUF(7, "Bind-dest %s c:%s:%d v:%s:%d " diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 0c043b6ce65..319991d4d25 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -194,7 +194,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_dest *dest; struct ip_vs_conn *ct; __be16 dport; /* destination port to forward */ - __be16 flags; + unsigned int flags; union nf_inet_addr snet; /* source network of the client, after masking */ @@ -382,7 +382,8 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; struct ip_vs_dest *dest; - __be16 _ports[2], *pptr, flags; + __be16 _ports[2], *pptr; + unsigned int flags; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); @@ -473,9 +474,9 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, if (sysctl_ip_vs_cache_bypass && svc->fwmark && unicast) { int ret, cs; struct ip_vs_conn *cp; - __u16 flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && - iph.protocol == IPPROTO_UDP)? - IP_VS_CONN_F_ONE_PACKET : 0; + unsigned int flags = (svc->flags & IP_VS_SVC_F_ONEPACKET && + iph.protocol == IPPROTO_UDP)? + IP_VS_CONN_F_ONE_PACKET : 0; union nf_inet_addr daddr = { .all = { 0, 0, 0, 0 } }; ip_vs_service_put(svc); diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index ca8ec8c4f31..7bd41d28080 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -765,7 +765,8 @@ __ip_vs_update_dest(struct ip_vs_service *svc, /* set the weight and the flags */ atomic_set(&dest->weight, udest->weight); - conn_flags = udest->conn_flags | IP_VS_CONN_F_INACTIVE; + conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; + conn_flags |= IP_VS_CONN_F_INACTIVE; /* check if local node and update the flags */ #ifdef CONFIG_IP_VS_IPV6 @@ -782,7 +783,7 @@ __ip_vs_update_dest(struct ip_vs_service *svc, } /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ - if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != 0) { + if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; } else { /* -- cgit v1.2.3 From f4bc17cdd205ebaa3807c2aa973719bb5ce6a5b2 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:35:41 +0200 Subject: ipvs: netfilter connection tracking changes Add more code to IPVS to work with Netfilter connection tracking and fix some problems. - Allow IPVS to be compiled without connection tracking as in 2.6.35 and before. This can avoid keeping conntracks for all IPVS connections because this costs memory. ip_vs_ftp still depends on connection tracking and NAT as implemented for 2.6.36. - Add sysctl var "conntrack" to enable connection tracking for all IPVS connections. For loaded IPVS directors it needs tuning of nf_conntrack_max limit. - Add IP_VS_CONN_F_NFCT connection flag to request the connection to use connection tracking. This allows user space to provide this flag, for example, in dest->conn_flags. This can be useful to request connection tracking per real server instead of forcing it for all connections with the "conntrack" sysctl. This flag is set currently only by ip_vs_ftp and of course by "conntrack" sysctl. - Add ip_vs_nfct.c file to hold all connection tracking code, by this way main code should not depend of netfilter conntrack support. - Return back the ip_vs_post_routing handler as in 2.6.35 and use skb->ipvs_property=1 to allow IPVS to work without connection tracking Connection tracking: - most of the code is already in 2.6.36-rc - alter conntrack reply tuple for LVS-NAT connections when first packet from client is forwarded and conntrack state is NEW or RELATED. Additionally, alter reply for RELATED connections from real server, again for packet in original direction. - add IP_VS_XMIT_TUNNEL to confirm conntrack (without altering reply) for LVS-TUN early because we want to call nf_reset. It is needed because we add IPIP header and the original conntrack should be preserved, not destroyed. The transmitted IPIP packets can reuse same conntrack, so we do not set skb->ipvs_property. - try to destroy conntrack when the IPVS connection is destroyed. It is not fatal if conntrack disappears before that, it depends on the used timers. Fix problems from long time: - add skb->ip_summed = CHECKSUM_NONE for the LVS-TUN transmitters Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/linux/ip_vs.h | 2 + include/net/ip_vs.h | 44 +++++- net/netfilter/ipvs/Kconfig | 13 +- net/netfilter/ipvs/Makefile | 5 +- net/netfilter/ipvs/ip_vs_conn.c | 13 ++ net/netfilter/ipvs/ip_vs_core.c | 46 ++++++- net/netfilter/ipvs/ip_vs_ctl.c | 12 ++ net/netfilter/ipvs/ip_vs_ftp.c | 146 +------------------- net/netfilter/ipvs/ip_vs_nfct.c | 292 ++++++++++++++++++++++++++++++++++++++++ net/netfilter/ipvs/ip_vs_xmit.c | 98 +++++++------- 10 files changed, 475 insertions(+), 196 deletions(-) create mode 100644 net/netfilter/ipvs/ip_vs_nfct.c (limited to 'include/net') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 003d75f6ffe..df772861372 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -90,10 +90,12 @@ #define IP_VS_CONN_F_ONE_PACKET 0x2000 /* forward only one packet */ /* Flags that are not sent to backup server start from bit 16 */ +#define IP_VS_CONN_F_NFCT (1 << 16) /* use netfilter conntrack */ /* Connection flags from destination that can be changed by user space */ #define IP_VS_CONN_F_DEST_MASK (IP_VS_CONN_F_FWD_MASK | \ IP_VS_CONN_F_ONE_PACKET | \ + IP_VS_CONN_F_NFCT | \ 0) #define IP_VS_SCHEDNAME_MAXLEN 16 diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 62698a9c163..e8ec5231eae 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,9 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ - +#ifdef CONFIG_IP_VS_NFCT +#include +#endif /* Connections' size value needed by ip_vs_ctl.c */ extern int ip_vs_conn_tab_size; @@ -798,6 +800,7 @@ extern int sysctl_ip_vs_expire_nodest_conn; extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; +extern int sysctl_ip_vs_conntrack; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; @@ -955,8 +958,47 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +#ifdef CONFIG_IP_VS_NFCT +/* + * Netfilter connection tracking + * (from ip_vs_nfct.c) + */ +static inline int ip_vs_conntrack_enabled(void) +{ + return sysctl_ip_vs_conntrack; +} + extern void ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin); +extern int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp); +extern void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 port, int from_rs); +extern void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp); + +#else + +static inline int ip_vs_conntrack_enabled(void) +{ + return 0; +} + +static inline void ip_vs_update_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp, int outin) +{ +} + +static inline int ip_vs_confirm_conntrack(struct sk_buff *skb, + struct ip_vs_conn *cp) +{ + return NF_ACCEPT; +} + +static inline void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) +{ +} +/* CONFIG_IP_VS_NFCT */ +#endif #endif /* __KERNEL__ */ diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig index 46a77d5c388..af3c9f48f2d 100644 --- a/net/netfilter/ipvs/Kconfig +++ b/net/netfilter/ipvs/Kconfig @@ -3,7 +3,7 @@ # menuconfig IP_VS tristate "IP virtual server support" - depends on NET && INET && NETFILTER && NF_CONNTRACK + depends on NET && INET && NETFILTER ---help--- IP Virtual Server support will let you build a high-performance virtual server based on cluster of two or more real servers. This @@ -235,7 +235,8 @@ comment 'IPVS application helper' config IP_VS_FTP tristate "FTP protocol helper" - depends on IP_VS_PROTO_TCP && NF_NAT + depends on IP_VS_PROTO_TCP && NF_CONNTRACK && NF_NAT + select IP_VS_NFCT ---help--- FTP is a protocol that transfers IP address and/or port number in the payload. In the virtual server via Network Address Translation, @@ -247,4 +248,12 @@ config IP_VS_FTP If you want to compile it in kernel, say Y. To compile it as a module, choose M here. If unsure, say N. +config IP_VS_NFCT + bool "Netfilter connection tracking" + depends on NF_CONNTRACK + ---help--- + The Netfilter connection tracking support allows the IPVS + connection state to be exported to the Netfilter framework + for filtering purposes. + endif # IP_VS diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index e3baefd7066..349fe8819b8 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -9,10 +9,13 @@ ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_UDP) += ip_vs_proto_udp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_AH_ESP) += ip_vs_proto_ah_esp.o ip_vs_proto-objs-$(CONFIG_IP_VS_PROTO_SCTP) += ip_vs_proto_sctp.o +ip_vs-extra_objs-y := +ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o + ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ ip_vs_est.o ip_vs_proto.o \ - $(ip_vs_proto-objs-y) + $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y) # IPVS core diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 9fe1da7bcf1..a970d969149 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -721,6 +721,9 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->control) ip_vs_control_del(cp); + if (cp->flags & IP_VS_CONN_F_NFCT) + ip_vs_conn_drop_conntrack(cp); + if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); @@ -816,6 +819,16 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, if (unlikely(pp && atomic_read(&pp->appcnt))) ip_vs_bind_app(cp, pp); + /* + * Allow conntrack to be preserved. By default, conntrack + * is created and destroyed for every packet. + * Sometimes keeping conntrack can be useful for + * IP_VS_CONN_F_ONE_PACKET too. + */ + + if (ip_vs_conntrack_enabled()) + cp->flags |= IP_VS_CONN_F_NFCT; + /* Hash it in the ip_vs_conn_tab finally */ ip_vs_conn_hash(cp); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 319991d4d25..7fbc80d81fe 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -537,6 +537,23 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } +/* + * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING + * chain and is used to avoid double NAT and confirmation when we do + * not want to keep the conntrack structure + */ +static unsigned int ip_vs_post_routing(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + if (!skb->ipvs_property) + return NF_ACCEPT; + /* The packet was sent from IPVS, exit this chain */ + return NF_STOP; +} + __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -695,7 +712,10 @@ static int handle_response_icmp(int af, struct sk_buff *skb, /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); - skb->ipvs_property = 1; + if (!(cp->flags & IP_VS_CONN_F_NFCT)) + skb->ipvs_property = 1; + else + ip_vs_update_conntrack(skb, cp, 0); verdict = NF_ACCEPT; out: @@ -928,17 +948,19 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); - ip_vs_update_conntrack(skb, cp, 0); + if (!(cp->flags & IP_VS_CONN_F_NFCT)) + skb->ipvs_property = 1; + else + ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); - skb->ipvs_property = 1; - LeaveFunction(11); return NF_ACCEPT; drop: ip_vs_conn_put(cp); kfree_skb(skb); + LeaveFunction(11); return NF_STOLEN; } @@ -1483,6 +1505,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, + /* Before the netfilter connection tracking, exit from POST_ROUTING */ + { + .hook = ip_vs_post_routing, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP_PRI_NAT_SRC-1, + }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1511,6 +1541,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, + /* Before the netfilter connection tracking, exit from POST_ROUTING */ + { + .hook = ip_vs_post_routing, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC-1, + }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 7bd41d28080..d2d842f292c 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -88,6 +88,9 @@ int sysctl_ip_vs_expire_nodest_conn = 0; int sysctl_ip_vs_expire_quiescent_template = 0; int sysctl_ip_vs_sync_threshold[2] = { 3, 50 }; int sysctl_ip_vs_nat_icmp_send = 0; +#ifdef CONFIG_IP_VS_NFCT +int sysctl_ip_vs_conntrack; +#endif #ifdef CONFIG_IP_VS_DEBUG @@ -1580,6 +1583,15 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_do_defense_mode, }, +#ifdef CONFIG_IP_VS_NFCT + { + .procname = "conntrack", + .data = &sysctl_ip_vs_conntrack, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, +#endif { .procname = "secure_tcp", .data = &sysctl_ip_vs_secure_tcp, diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 7e9af5b76d9..9cd375f94d6 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -20,17 +20,6 @@ * * Author: Wouter Gadeyne * - * - * Code for ip_vs_expect_related and ip_vs_expect_callback is taken from - * http://www.ssi.bg/~ja/nfct/: - * - * ip_vs_nfct.c: Netfilter connection tracking support for IPVS - * - * Portions Copyright (C) 2001-2002 - * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. - * - * Portions Copyright (C) 2003-2008 - * Julian Anastasov */ #define KMSG_COMPONENT "IPVS" @@ -58,16 +47,6 @@ #define SERVER_STRING "227 Entering Passive Mode (" #define CLIENT_STRING "PORT " -#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" -#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ - &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ - (T)->dst.protonum - -#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" -#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ - &((C)->vaddr.ip), ntohs((C)->vport), \ - &((C)->daddr.ip), ntohs((C)->dport), \ - (C)->protocol, (C)->state /* * List of ports (up to IP_VS_APP_MAX_PORTS) to be handled by helper @@ -85,6 +64,8 @@ static int ip_vs_ftp_pasv; static int ip_vs_ftp_init_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) { + /* We use connection tracking for the command connection */ + cp->flags |= IP_VS_CONN_F_NFCT; return 0; } @@ -148,120 +129,6 @@ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, return 1; } -/* - * Called from init_conntrack() as expectfn handler. - */ -static void -ip_vs_expect_callback(struct nf_conn *ct, - struct nf_conntrack_expect *exp) -{ - struct nf_conntrack_tuple *orig, new_reply; - struct ip_vs_conn *cp; - - if (exp->tuple.src.l3num != PF_INET) - return; - - /* - * We assume that no NF locks are held before this callback. - * ip_vs_conn_out_get and ip_vs_conn_in_get should match their - * expectations even if they use wildcard values, now we provide the - * actual values from the newly created original conntrack direction. - * The conntrack is confirmed when packet reaches IPVS hooks. - */ - - /* RS->CLIENT */ - orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, - &orig->src.u3, orig->src.u.tcp.port, - &orig->dst.u3, orig->dst.u.tcp.port); - if (cp) { - /* Change reply CLIENT->RS to CLIENT->VS */ - new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " - FMT_TUPLE ", found inout cp=" FMT_CONN "\n", - __func__, ct, ct->status, - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), - ARG_CONN(cp)); - new_reply.dst.u3 = cp->vaddr; - new_reply.dst.u.tcp.port = cp->vport; - IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE - ", inout cp=" FMT_CONN "\n", - __func__, ct, - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), - ARG_CONN(cp)); - goto alter; - } - - /* CLIENT->VS */ - cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, - &orig->src.u3, orig->src.u.tcp.port, - &orig->dst.u3, orig->dst.u.tcp.port); - if (cp) { - /* Change reply VS->CLIENT to RS->CLIENT */ - new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " - FMT_TUPLE ", found outin cp=" FMT_CONN "\n", - __func__, ct, ct->status, - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), - ARG_CONN(cp)); - new_reply.src.u3 = cp->daddr; - new_reply.src.u.tcp.port = cp->dport; - IP_VS_DBG(7, "%s(): ct=%p, new tuples=" FMT_TUPLE ", " - FMT_TUPLE ", outin cp=" FMT_CONN "\n", - __func__, ct, - ARG_TUPLE(orig), ARG_TUPLE(&new_reply), - ARG_CONN(cp)); - goto alter; - } - - IP_VS_DBG(7, "%s(): ct=%p, status=0x%lX, tuple=" FMT_TUPLE - " - unknown expect\n", - __func__, ct, ct->status, ARG_TUPLE(orig)); - return; - -alter: - /* Never alter conntrack for non-NAT conns */ - if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) - nf_conntrack_alter_reply(ct, &new_reply); - ip_vs_conn_put(cp); - return; -} - -/* - * Create NF conntrack expectation with wildcard (optional) source port. - * Then the default callback function will alter the reply and will confirm - * the conntrack entry when the first packet comes. - */ -static void -ip_vs_expect_related(struct sk_buff *skb, struct nf_conn *ct, - struct ip_vs_conn *cp, u_int8_t proto, - const __be16 *port, int from_rs) -{ - struct nf_conntrack_expect *exp; - - BUG_ON(!ct || ct == &nf_conntrack_untracked); - - exp = nf_ct_expect_alloc(ct); - if (!exp) - return; - - if (from_rs) - nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, - nf_ct_l3num(ct), &cp->daddr, &cp->caddr, - proto, port, &cp->cport); - else - nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, - nf_ct_l3num(ct), &cp->caddr, &cp->vaddr, - proto, port, &cp->vport); - - exp->expectfn = ip_vs_expect_callback; - - IP_VS_DBG(7, "%s(): ct=%p, expect tuple=" FMT_TUPLE "\n", - __func__, ct, ARG_TUPLE(&exp->tuple)); - nf_ct_expect_related(exp); - nf_ct_expect_put(exp); -} - /* * Look at outgoing ftp packets to catch the response to a PASV command * from the server (inside-to-outside). @@ -335,7 +202,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, &cp->caddr, 0, &cp->vaddr, port, &from, port, - IP_VS_CONN_F_NO_CPORT, + IP_VS_CONN_F_NO_CPORT | + IP_VS_CONN_F_NFCT, cp->dest); if (!n_cp) return 0; @@ -371,8 +239,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, start-data, end-start, buf, buf_len); if (ret) - ip_vs_expect_related(skb, ct, n_cp, - IPPROTO_TCP, NULL, 0); + ip_vs_nfct_expect_related(skb, ct, n_cp, + IPPROTO_TCP, 0, 0); } /* @@ -487,7 +355,7 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, &to, port, &cp->vaddr, htons(ntohs(cp->vport)-1), &cp->daddr, htons(ntohs(cp->dport)-1), - 0, + IP_VS_CONN_F_NFCT, cp->dest); if (!n_cp) return 0; diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c new file mode 100644 index 00000000000..c038458d029 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -0,0 +1,292 @@ +/* + * ip_vs_nfct.c: Netfilter connection tracking support for IPVS + * + * Portions Copyright (C) 2001-2002 + * Antefacto Ltd, 181 Parnell St, Dublin 1, Ireland. + * + * Portions Copyright (C) 2003-2010 + * Julian Anastasov + * + * + * This code is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + * + * Authors: + * Ben North + * Julian Anastasov Reorganize and sync with latest kernels + * Hannes Eder Extend NFCT support for FTP, ipvs match + * + * + * Current status: + * + * - provide conntrack confirmation for new and related connections, by + * this way we can see their proper conntrack state in all hooks + * - support for all forwarding methods, not only NAT + * - FTP support (NAT), ability to support other NAT apps with expectations + * - to correctly create expectations for related NAT connections the proper + * NF conntrack support must be already installed, eg. ip_vs_ftp requires + * nf_conntrack_ftp ... iptables_nat for the same ports (but no iptables + * NAT rules are needed) + * - alter reply for NAT when forwarding packet in original direction: + * conntrack from client in NEW or RELATED (Passive FTP DATA) state or + * when RELATED conntrack is created from real server (Active FTP DATA) + * - if iptables_nat is not loaded the Passive FTP will not work (the + * PASV response can not be NAT-ed) but Active FTP should work + * + */ + +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + + +#define FMT_TUPLE "%pI4:%u->%pI4:%u/%u" +#define ARG_TUPLE(T) &(T)->src.u3.ip, ntohs((T)->src.u.all), \ + &(T)->dst.u3.ip, ntohs((T)->dst.u.all), \ + (T)->dst.protonum + +#define FMT_CONN "%pI4:%u->%pI4:%u->%pI4:%u/%u:%u" +#define ARG_CONN(C) &((C)->caddr.ip), ntohs((C)->cport), \ + &((C)->vaddr.ip), ntohs((C)->vport), \ + &((C)->daddr.ip), ntohs((C)->dport), \ + (C)->protocol, (C)->state + +void +ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) +{ + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + struct nf_conntrack_tuple new_tuple; + + if (ct == NULL || nf_ct_is_confirmed(ct) || nf_ct_is_untracked(ct) || + nf_ct_is_dying(ct)) + return; + + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) != IP_VS_CONN_F_MASQ) + return; + + /* Alter reply only in original direction */ + if (CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) + return; + + /* + * The connection is not yet in the hashtable, so we update it. + * CIP->VIP will remain the same, so leave the tuple in + * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the + * real-server we will see RIP->DIP. + */ + new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + /* + * This will also take care of UDP and other protocols. + */ + if (outin) { + new_tuple.src.u3 = cp->daddr; + if (new_tuple.dst.protonum != IPPROTO_ICMP && + new_tuple.dst.protonum != IPPROTO_ICMPV6) + new_tuple.src.u.tcp.port = cp->dport; + } else { + new_tuple.dst.u3 = cp->vaddr; + if (new_tuple.dst.protonum != IPPROTO_ICMP && + new_tuple.dst.protonum != IPPROTO_ICMPV6) + new_tuple.dst.u.tcp.port = cp->vport; + } + IP_VS_DBG(7, "%s: Updating conntrack ct=%p, status=0x%lX, " + "ctinfo=%d, old reply=" FMT_TUPLE + ", new reply=" FMT_TUPLE ", cp=" FMT_CONN "\n", + __func__, ct, ct->status, ctinfo, + ARG_TUPLE(&ct->tuplehash[IP_CT_DIR_REPLY].tuple), + ARG_TUPLE(&new_tuple), ARG_CONN(cp)); + nf_conntrack_alter_reply(ct, &new_tuple); +} + +int ip_vs_confirm_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp) +{ + return nf_conntrack_confirm(skb); +} + +/* + * Called from init_conntrack() as expectfn handler. + */ +static void ip_vs_nfct_expect_callback(struct nf_conn *ct, + struct nf_conntrack_expect *exp) +{ + struct nf_conntrack_tuple *orig, new_reply; + struct ip_vs_conn *cp; + + if (exp->tuple.src.l3num != PF_INET) + return; + + /* + * We assume that no NF locks are held before this callback. + * ip_vs_conn_out_get and ip_vs_conn_in_get should match their + * expectations even if they use wildcard values, now we provide the + * actual values from the newly created original conntrack direction. + * The conntrack is confirmed when packet reaches IPVS hooks. + */ + + /* RS->CLIENT */ + orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; + cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply CLIENT->RS to CLIENT->VS */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found inout cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.dst.u3 = cp->vaddr; + new_reply.dst.u.tcp.port = cp->vport; + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " FMT_TUPLE + ", inout cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + /* CLIENT->VS */ + cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port); + if (cp) { + /* Change reply VS->CLIENT to RS->CLIENT */ + new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuples=" FMT_TUPLE ", " + FMT_TUPLE ", found outin cp=" FMT_CONN "\n", + __func__, ct, ct->status, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + new_reply.src.u3 = cp->daddr; + new_reply.src.u.tcp.port = cp->dport; + IP_VS_DBG(7, "%s: ct=%p, new tuples=" FMT_TUPLE ", " + FMT_TUPLE ", outin cp=" FMT_CONN "\n", + __func__, ct, + ARG_TUPLE(orig), ARG_TUPLE(&new_reply), + ARG_CONN(cp)); + goto alter; + } + + IP_VS_DBG(7, "%s: ct=%p, status=0x%lX, tuple=" FMT_TUPLE + " - unknown expect\n", + __func__, ct, ct->status, ARG_TUPLE(orig)); + return; + +alter: + /* Never alter conntrack for non-NAT conns */ + if (IP_VS_FWD_METHOD(cp) == IP_VS_CONN_F_MASQ) + nf_conntrack_alter_reply(ct, &new_reply); + ip_vs_conn_put(cp); + return; +} + +/* + * Create NF conntrack expectation with wildcard (optional) source port. + * Then the default callback function will alter the reply and will confirm + * the conntrack entry when the first packet comes. + * Use port 0 to expect connection from any port. + */ +void ip_vs_nfct_expect_related(struct sk_buff *skb, struct nf_conn *ct, + struct ip_vs_conn *cp, u_int8_t proto, + const __be16 port, int from_rs) +{ + struct nf_conntrack_expect *exp; + + if (ct == NULL || nf_ct_is_untracked(ct)) + return; + + exp = nf_ct_expect_alloc(ct); + if (!exp) + return; + + nf_ct_expect_init(exp, NF_CT_EXPECT_CLASS_DEFAULT, nf_ct_l3num(ct), + from_rs ? &cp->daddr : &cp->caddr, + from_rs ? &cp->caddr : &cp->vaddr, + proto, port ? &port : NULL, + from_rs ? &cp->cport : &cp->vport); + + exp->expectfn = ip_vs_nfct_expect_callback; + + IP_VS_DBG(7, "%s: ct=%p, expect tuple=" FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&exp->tuple)); + nf_ct_expect_related(exp); + nf_ct_expect_put(exp); +} +EXPORT_SYMBOL(ip_vs_nfct_expect_related); + +/* + * Our connection was terminated, try to drop the conntrack immediately + */ +void ip_vs_conn_drop_conntrack(struct ip_vs_conn *cp) +{ + struct nf_conntrack_tuple_hash *h; + struct nf_conn *ct; + struct nf_conntrack_tuple tuple; + + if (!cp->cport) + return; + + tuple = (struct nf_conntrack_tuple) { + .dst = { .protonum = cp->protocol, .dir = IP_CT_DIR_ORIGINAL } }; + tuple.src.u3 = cp->caddr; + tuple.src.u.all = cp->cport; + tuple.src.l3num = cp->af; + tuple.dst.u3 = cp->vaddr; + tuple.dst.u.all = cp->vport; + + IP_VS_DBG(7, "%s: dropping conntrack with tuple=" FMT_TUPLE + " for conn " FMT_CONN "\n", + __func__, ARG_TUPLE(&tuple), ARG_CONN(cp)); + + h = nf_conntrack_find_get(&init_net, NF_CT_DEFAULT_ZONE, &tuple); + if (h) { + ct = nf_ct_tuplehash_to_ctrack(h); + /* Show what happens instead of calling nf_ct_kill() */ + if (del_timer(&ct->timeout)) { + IP_VS_DBG(7, "%s: ct=%p, deleted conntrack timer for tuple=" + FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&tuple)); + if (ct->timeout.function) + ct->timeout.function(ct->timeout.data); + } else { + IP_VS_DBG(7, "%s: ct=%p, no conntrack timer for tuple=" + FMT_TUPLE "\n", + __func__, ct, ARG_TUPLE(&tuple)); + } + nf_ct_put(ct); + } else { + IP_VS_DBG(7, "%s: no conntrack for tuple=" FMT_TUPLE "\n", + __func__, ARG_TUPLE(&tuple)); + } +} + diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 49df6bea6a2..8817afa34e6 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -28,7 +28,6 @@ #include #include #include -#include #include #include @@ -194,12 +193,37 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) dst_release(old_dst); } -#define IP_VS_XMIT(pf, skb, rt) \ +#define IP_VS_XMIT_TUNNEL(skb, cp) \ +({ \ + int __ret = NF_ACCEPT; \ + \ + if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ + __ret = ip_vs_confirm_conntrack(skb, cp); \ + if (__ret == NF_ACCEPT) { \ + nf_reset(skb); \ + (skb)->ip_summed = CHECKSUM_NONE; \ + } \ + __ret; \ +}) + +#define IP_VS_XMIT_NAT(pf, skb, cp) \ do { \ - (skb)->ipvs_property = 1; \ + if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ + (skb)->ipvs_property = 1; \ + else \ + ip_vs_update_conntrack(skb, cp, 1); \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ - (rt)->dst.dev, dst_output); \ + skb_dst(skb)->dev, dst_output); \ +} while (0) + +#define IP_VS_XMIT(pf, skb, cp) \ +do { \ + if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ + (skb)->ipvs_property = 1; \ + skb_forward_csum(skb); \ + NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ + skb_dst(skb)->dev, dst_output); \ } while (0) @@ -271,7 +295,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, rt); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -335,7 +359,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, rt); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -349,36 +373,6 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, } #endif -void -ip_vs_update_conntrack(struct sk_buff *skb, struct ip_vs_conn *cp, int outin) -{ - struct nf_conn *ct = (struct nf_conn *)skb->nfct; - struct nf_conntrack_tuple new_tuple; - - if (ct == NULL || nf_ct_is_untracked(ct) || nf_ct_is_confirmed(ct)) - return; - - /* - * The connection is not yet in the hashtable, so we update it. - * CIP->VIP will remain the same, so leave the tuple in - * IP_CT_DIR_ORIGINAL untouched. When the reply comes back from the - * real-server we will see RIP->DIP. - */ - new_tuple = ct->tuplehash[IP_CT_DIR_REPLY].tuple; - if (outin) - new_tuple.src.u3 = cp->daddr; - else - new_tuple.dst.u3 = cp->vaddr; - /* - * This will also take care of UDP and other protocols. - */ - if (outin) - new_tuple.src.u.tcp.port = cp->dport; - else - new_tuple.dst.u.tcp.port = cp->vport; - nf_conntrack_alter_reply(ct, &new_tuple); -} - /* * NAT transmitter (only for outside-to-inside nat forwarding) * Not used for related ICMP @@ -434,8 +428,6 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp, 1); - /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -443,7 +435,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, rt); + IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -451,8 +443,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, tx_error_icmp: dst_link_failure(skb); tx_error: - LeaveFunction(10); kfree_skb(skb); + LeaveFunction(10); return NF_STOLEN; tx_error_put: ip_rt_put(rt); @@ -512,8 +504,6 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); - ip_vs_update_conntrack(skb, cp, 1); - /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still MTU problem. */ @@ -521,7 +511,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, rt); + IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -571,6 +561,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; + int ret; EnterFunction(10); @@ -655,7 +646,11 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ip_local_out(skb); + ret = IP_VS_XMIT_TUNNEL(skb, cp); + if (ret == NF_ACCEPT) + ip_local_out(skb); + else if (ret == NF_DROP) + kfree_skb(skb); LeaveFunction(10); @@ -681,6 +676,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; + int ret; EnterFunction(10); @@ -761,7 +757,11 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - ip6_local_out(skb); + ret = IP_VS_XMIT_TUNNEL(skb, cp); + if (ret == NF_ACCEPT) + ip6_local_out(skb); + else if (ret == NF_DROP) + kfree_skb(skb); LeaveFunction(10); @@ -820,7 +820,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, rt); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -873,7 +873,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, rt); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp); LeaveFunction(10); return NF_STOLEN; @@ -947,7 +947,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, rt); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp); rc = NF_STOLEN; goto out; @@ -1022,7 +1022,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, rt); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp); rc = NF_STOLEN; goto out; -- cgit v1.2.3 From 8a8030407f55a6aaedb51167c1a2383311fcd707 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Tue, 21 Sep 2010 17:38:57 +0200 Subject: ipvs: make rerouting optional with snat_reroute Add new sysctl flag "snat_reroute". Recent kernels use ip_route_me_harder() to route LVS-NAT responses properly by VIP when there are multiple paths to client. But setups that do not have alternative default routes can skip this routing lookup by using snat_reroute=0. Signed-off-by: Julian Anastasov Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_core.c | 37 +++++++++++++++++++++++++++++-------- net/netfilter/ipvs/ip_vs_ctl.c | 8 ++++++++ 3 files changed, 38 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index e8ec5231eae..3915a4f4cd3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -801,6 +801,7 @@ extern int sysctl_ip_vs_expire_quiescent_template; extern int sysctl_ip_vs_sync_threshold[2]; extern int sysctl_ip_vs_nat_icmp_send; extern int sysctl_ip_vs_conntrack; +extern int sysctl_ip_vs_snat_reroute; extern struct ip_vs_stats ip_vs_stats; extern const struct ctl_path net_vs_ctl_path[]; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 7fbc80d81fe..06c388bf4e3 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -929,20 +929,31 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_send_check(ip_hdr(skb)); } + /* + * nf_iterate does not expect change in the skb->dst->dev. + * It looks like it is not fatal to enable this code for hooks + * where our handlers are at the end of the chain list and + * when all next handlers use skb->dst->dev and not outdev. + * It will definitely route properly the inout NAT traffic + * when multiple paths are used. + */ + /* For policy routing, packets originating from this * machine itself may be routed differently to packets * passing through. We want this packet to be routed as * if it came from this machine itself. So re-compute * the routing information. */ + if (sysctl_ip_vs_snat_reroute) { #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) { - if (ip6_route_me_harder(skb) != 0) - goto drop; - } else + if (af == AF_INET6) { + if (ip6_route_me_harder(skb) != 0) + goto drop; + } else #endif - if (ip_route_me_harder(skb, RTN_LOCAL) != 0) - goto drop; + if (ip_route_me_harder(skb, RTN_LOCAL) != 0) + goto drop; + } IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); @@ -991,8 +1002,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, if (unlikely(iph.protocol == IPPROTO_ICMPV6)) { int related, verdict = ip_vs_out_icmp_v6(skb, &related); - if (related) + if (related) { + if (sysctl_ip_vs_snat_reroute && + NF_ACCEPT == verdict && + ip6_route_me_harder(skb)) + verdict = NF_DROP; return verdict; + } ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } } else @@ -1000,8 +1016,13 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, if (unlikely(iph.protocol == IPPROTO_ICMP)) { int related, verdict = ip_vs_out_icmp(skb, &related); - if (related) + if (related) { + if (sysctl_ip_vs_snat_reroute && + NF_ACCEPT == verdict && + ip_route_me_harder(skb, RTN_LOCAL)) + verdict = NF_DROP; return verdict; + } ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); } diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index d2d842f292c..e637cd0384b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -91,6 +91,7 @@ int sysctl_ip_vs_nat_icmp_send = 0; #ifdef CONFIG_IP_VS_NFCT int sysctl_ip_vs_conntrack; #endif +int sysctl_ip_vs_snat_reroute = 1; #ifdef CONFIG_IP_VS_DEBUG @@ -1599,6 +1600,13 @@ static struct ctl_table vs_vars[] = { .mode = 0644, .proc_handler = proc_do_defense_mode, }, + { + .procname = "snat_reroute", + .data = &sysctl_ip_vs_snat_reroute, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = &proc_dointvec, + }, #if 0 { .procname = "timeout_established", -- cgit v1.2.3 From 48daa3bb84d547828871534caa51427a3fe90748 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 21 Sep 2010 06:57:39 +0000 Subject: ipv6: addrconf.h cleanups - Use rcu_dereference_rtnl() in __in6_dev_get - kerneldoc for __in6_dev_get() and in6_dev_get() - Use inline functions instead of macros Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 63 ++++++++++++++++++++++++++++++++++---------------- 1 file changed, 43 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 45375b41a2a..7d178a758ac 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -174,20 +174,32 @@ extern int ipv6_chk_acast_addr(struct net *net, struct net_device *dev, extern int register_inet6addr_notifier(struct notifier_block *nb); extern int unregister_inet6addr_notifier(struct notifier_block *nb); -static inline struct inet6_dev * -__in6_dev_get(struct net_device *dev) +/** + * __in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * Caller must hold rcu_read_lock or RTNL, because this function + * does not take a reference on the inet6_dev. + */ +static inline struct inet6_dev *__in6_dev_get(const struct net_device *dev) { - return rcu_dereference_check(dev->ip6_ptr, - rcu_read_lock_held() || - lockdep_rtnl_is_held()); + return rcu_dereference_rtnl(dev->ip6_ptr); } -static inline struct inet6_dev * -in6_dev_get(struct net_device *dev) +/** + * in6_dev_get - get inet6_dev pointer from netdevice + * @dev: network device + * + * This version can be used in any context, and takes a reference + * on the inet6_dev. Callers must use in6_dev_put() later to + * release this reference. + */ +static inline struct inet6_dev *in6_dev_get(const struct net_device *dev) { - struct inet6_dev *idev = NULL; + struct inet6_dev *idev; + rcu_read_lock(); - idev = __in6_dev_get(dev); + idev = rcu_dereference(dev->ip6_ptr); if (idev) atomic_inc(&idev->refcnt); rcu_read_unlock(); @@ -196,16 +208,21 @@ in6_dev_get(struct net_device *dev) extern void in6_dev_finish_destroy(struct inet6_dev *idev); -static inline void -in6_dev_put(struct inet6_dev *idev) +static inline void in6_dev_put(struct inet6_dev *idev) { if (atomic_dec_and_test(&idev->refcnt)) in6_dev_finish_destroy(idev); } -#define __in6_dev_put(idev) atomic_dec(&(idev)->refcnt) -#define in6_dev_hold(idev) atomic_inc(&(idev)->refcnt) +static inline void __in6_dev_put(struct inet6_dev *idev) +{ + atomic_dec(&idev->refcnt); +} +static inline void in6_dev_hold(struct inet6_dev *idev) +{ + atomic_inc(&idev->refcnt); +} extern void inet6_ifa_finish_destroy(struct inet6_ifaddr *ifp); @@ -215,9 +232,15 @@ static inline void in6_ifa_put(struct inet6_ifaddr *ifp) inet6_ifa_finish_destroy(ifp); } -#define __in6_ifa_put(ifp) atomic_dec(&(ifp)->refcnt) -#define in6_ifa_hold(ifp) atomic_inc(&(ifp)->refcnt) +static inline void __in6_ifa_put(struct inet6_ifaddr *ifp) +{ + atomic_dec(&ifp->refcnt); +} +static inline void in6_ifa_hold(struct inet6_ifaddr *ifp) +{ + atomic_inc(&ifp->refcnt); +} /* @@ -240,23 +263,23 @@ static inline int ipv6_addr_is_multicast(const struct in6_addr *addr) static inline int ipv6_addr_is_ll_all_nodes(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000001))) == 0; } static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) { - return (((addr->s6_addr32[0] ^ htonl(0xff020000)) | + return ((addr->s6_addr32[0] ^ htonl(0xff020000)) | addr->s6_addr32[1] | addr->s6_addr32[2] | - (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0); + (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { - return ((addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE)); + return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); } #ifdef CONFIG_PROC_FS -- cgit v1.2.3 From 8b008faf92ac8f7eeb65e8cd36077601af7c46db Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 22 Sep 2010 08:36:59 +0200 Subject: netfilter: ctnetlink: allow to specify the expectation flags With this patch, you can specify the expectation flags for user-space created expectations. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 4 ++++ include/linux/netfilter/nfnetlink_conntrack.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 3 --- net/netfilter/nf_conntrack_netlink.c | 8 +++++++- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 1afd18c855e..fdc50cae861 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -100,6 +100,10 @@ enum ip_conntrack_expect_events { IPEXP_NEW, /* new expectation */ }; +/* expectation flags */ +#define NF_CT_EXPECT_PERMANENT 0x1 +#define NF_CT_EXPECT_INACTIVE 0x2 + #ifdef __KERNEL__ struct ip_conntrack_stat { unsigned int searched; diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 9ed534c991b..455f0ce4f43 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -161,6 +161,7 @@ enum ctattr_expect { CTA_EXPECT_ID, CTA_EXPECT_HELP_NAME, CTA_EXPECT_ZONE, + CTA_EXPECT_FLAGS, __CTA_EXPECT_MAX }; #define CTA_EXPECT_MAX (__CTA_EXPECT_MAX - 1) diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 11e815084fc..96bb42af5fa 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -67,9 +67,6 @@ struct nf_conntrack_expect_policy { #define NF_CT_EXPECT_CLASS_DEFAULT 0 -#define NF_CT_EXPECT_PERMANENT 0x1 -#define NF_CT_EXPECT_INACTIVE 0x2 - int nf_conntrack_expect_init(struct net *net); void nf_conntrack_expect_fini(struct net *net); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 37533a30413..0804e0ef650 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1577,6 +1577,7 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); + NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)); helper = rcu_dereference(nfct_help(master)->helper); if (helper) NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); @@ -1734,6 +1735,7 @@ static const struct nla_policy exp_nla_policy[CTA_EXPECT_MAX+1] = { [CTA_EXPECT_ID] = { .type = NLA_U32 }, [CTA_EXPECT_HELP_NAME] = { .type = NLA_NUL_STRING }, [CTA_EXPECT_ZONE] = { .type = NLA_U16 }, + [CTA_EXPECT_FLAGS] = { .type = NLA_U32 }, }; static int @@ -1933,9 +1935,13 @@ ctnetlink_create_expect(struct net *net, u16 zone, goto out; } + if (cda[CTA_EXPECT_FLAGS]) + exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); + else + exp->flags = 0; + exp->class = 0; exp->expectfn = NULL; - exp->flags = 0; exp->master = ct; exp->helper = NULL; memcpy(&exp->tuple, &tuple, sizeof(struct nf_conntrack_tuple)); -- cgit v1.2.3 From a02cec2155fbea457eca8881870fd2de1a4c4c76 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Sep 2010 20:43:57 +0000 Subject: net: return operator cleanup Change "return (EXPR);" to "return EXPR;" return is not a function, parentheses are not required. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/atmdev.h | 2 +- include/linux/etherdevice.h | 4 +-- include/linux/netdevice.h | 2 +- include/linux/skbuff.h | 6 ++--- include/net/bluetooth/hci_core.h | 2 +- include/net/bluetooth/l2cap.h | 2 +- include/net/inet_ecn.h | 2 +- include/net/ip.h | 4 +-- include/net/ipv6.h | 35 +++++++++++++------------- include/net/irda/irlap.h | 2 +- include/net/irda/irlmp.h | 2 +- include/net/irda/irttp.h | 2 +- include/net/sch_generic.h | 2 +- include/net/sctp/sctp.h | 12 ++++----- include/net/sctp/sm.h | 10 ++++---- include/net/sctp/structs.h | 2 +- include/net/sctp/tsnmap.h | 2 +- include/net/tipc/tipc_msg.h | 10 ++++---- net/802/fc.c | 2 +- net/802/fddi.c | 12 ++++----- net/802/hippi.c | 2 +- net/802/tr.c | 2 +- net/8021q/vlan_core.c | 2 +- net/9p/client.c | 4 +-- net/bluetooth/rfcomm/core.c | 4 +-- net/core/flow.c | 4 +-- net/core/neighbour.c | 6 ++--- net/core/utils.c | 2 +- net/dccp/ccids/lib/loss_interval.c | 2 +- net/econet/af_econet.c | 4 +-- net/ethernet/eth.c | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/inet_diag.c | 2 +- net/ipv4/ip_fragment.c | 4 +-- net/ipv4/ip_gre.c | 2 +- net/ipv4/netfilter/arp_tables.c | 2 +- net/ipv4/route.c | 2 +- net/ipv4/tcp_input.c | 10 ++++---- net/ipv4/tcp_minisocks.c | 2 +- net/ipv4/tcp_output.c | 8 +++--- net/ipv4/tcp_westwood.c | 2 +- net/ipv6/addrconf.c | 2 +- net/ipv6/addrlabel.c | 5 ++-- net/ipv6/af_inet6.c | 6 ++--- net/ipv6/exthdrs_core.c | 4 +-- net/ipv6/ip6_output.c | 4 +-- net/ipv6/ndisc.c | 8 +++--- net/ipv6/netfilter/ip6_tables.c | 14 +++++------ net/ipv6/raw.c | 12 ++++----- net/ipv6/route.c | 14 +++++------ net/ipv6/tcp_ipv6.c | 2 +- net/ipv6/xfrm6_policy.c | 2 +- net/irda/af_irda.c | 14 +++++------ net/irda/discovery.c | 2 +- net/irda/ircomm/ircomm_tty.c | 4 +-- net/irda/irlmp.c | 2 +- net/irda/irlmp_frame.c | 2 +- net/irda/irnet/irnet_irda.c | 22 ++++++++--------- net/irda/irnet/irnet_ppp.c | 8 +++--- net/key/af_key.c | 4 +-- net/mac80211/rate.c | 2 +- net/rfkill/input.c | 2 +- net/rose/rose_link.c | 4 +-- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 6 ++--- net/sunrpc/auth_gss/auth_gss.c | 2 +- net/sunrpc/auth_gss/gss_generic_token.c | 44 ++++++++++++++++----------------- net/sunrpc/auth_gss/gss_krb5_seqnum.c | 2 +- net/sunrpc/auth_gss/gss_mech_switch.c | 2 +- net/sunrpc/sched.c | 2 +- net/tipc/addr.c | 2 +- net/tipc/bcast.c | 2 +- net/tipc/bearer.c | 2 +- net/tipc/dbg.c | 4 +-- net/tipc/link.c | 6 ++--- net/tipc/link.h | 16 ++++++------ net/tipc/msg.h | 6 ++--- net/tipc/name_table.c | 2 +- net/tipc/node.c | 6 ++--- net/tipc/port.h | 2 +- net/tipc/socket.c | 2 +- net/tipc/subscr.c | 2 +- net/wireless/core.h | 2 +- 84 files changed, 220 insertions(+), 222 deletions(-) (limited to 'include/net') diff --git a/include/linux/atmdev.h b/include/linux/atmdev.h index f6481daf6e5..a8e4e832cdb 100644 --- a/include/linux/atmdev.h +++ b/include/linux/atmdev.h @@ -449,7 +449,7 @@ void vcc_insert_socket(struct sock *sk); static inline int atm_guess_pdu2truesize(int size) { - return (SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info)); + return SKB_DATA_ALIGN(size) + sizeof(struct skb_shared_info); } diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index fb6aa607092..f16a01081e1 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -71,7 +71,7 @@ static inline int is_zero_ether_addr(const u8 *addr) */ static inline int is_multicast_ether_addr(const u8 *addr) { - return (0x01 & addr[0]); + return 0x01 & addr[0]; } /** @@ -82,7 +82,7 @@ static inline int is_multicast_ether_addr(const u8 *addr) */ static inline int is_local_ether_addr(const u8 *addr) { - return (0x02 & addr[0]); + return 0x02 & addr[0]; } /** diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f7f1302138a..45dcda5bfda 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1676,7 +1676,7 @@ static inline void netif_wake_subqueue(struct net_device *dev, u16 queue_index) */ static inline int netif_is_multiqueue(const struct net_device *dev) { - return (dev->num_tx_queues > 1); + return dev->num_tx_queues > 1; } extern void netif_set_real_num_tx_queues(struct net_device *dev, diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 9e8085a8958..b2c41d19735 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -601,7 +601,7 @@ static inline int skb_queue_empty(const struct sk_buff_head *list) static inline bool skb_queue_is_last(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->next == (struct sk_buff *) list); + return skb->next == (struct sk_buff *)list; } /** @@ -614,7 +614,7 @@ static inline bool skb_queue_is_last(const struct sk_buff_head *list, static inline bool skb_queue_is_first(const struct sk_buff_head *list, const struct sk_buff *skb) { - return (skb->prev == (struct sk_buff *) list); + return skb->prev == (struct sk_buff *)list; } /** @@ -2156,7 +2156,7 @@ static inline u16 skb_get_rx_queue(const struct sk_buff *skb) static inline bool skb_rx_queue_recorded(const struct sk_buff *skb) { - return (skb->queue_mapping != 0); + return skb->queue_mapping != 0; } extern u16 skb_tx_hash(const struct net_device *dev, diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 4568b938ca3..ebec8c9a929 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -233,7 +233,7 @@ static inline void inquiry_cache_init(struct hci_dev *hdev) static inline int inquiry_cache_empty(struct hci_dev *hdev) { struct inquiry_cache *c = &hdev->inq_cache; - return (c->list == NULL); + return c->list == NULL; } static inline long inquiry_cache_age(struct hci_dev *hdev) diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h index 6c241444f90..c819c8bf9b6 100644 --- a/include/net/bluetooth/l2cap.h +++ b/include/net/bluetooth/l2cap.h @@ -414,7 +414,7 @@ static inline int l2cap_tx_window_full(struct sock *sk) if (sub < 0) sub += 64; - return (sub == pi->remote_tx_win); + return sub == pi->remote_tx_win; } #define __get_txseq(ctrl) ((ctrl) & L2CAP_CTRL_TXSEQ) >> 1 diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 9b5d08f4f6e..88bdd010d65 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -27,7 +27,7 @@ static inline int INET_ECN_is_not_ect(__u8 dsfield) static inline int INET_ECN_is_capable(__u8 dsfield) { - return (dsfield & INET_ECN_ECT_0); + return dsfield & INET_ECN_ECT_0; } static inline __u8 INET_ECN_encapsulate(__u8 outer, __u8 inner) diff --git a/include/net/ip.h b/include/net/ip.h index 7691aca133d..dbee3fe260e 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -238,9 +238,9 @@ int ip_decrease_ttl(struct iphdr *iph) static inline int ip_dont_fragment(struct sock *sk, struct dst_entry *dst) { - return (inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || + return inet_sk(sk)->pmtudisc == IP_PMTUDISC_DO || (inet_sk(sk)->pmtudisc == IP_PMTUDISC_WANT && - !(dst_metric_locked(dst, RTAX_MTU)))); + !(dst_metric_locked(dst, RTAX_MTU))); } extern void __ip_select_ident(struct iphdr *iph, struct dst_entry *dst, int more); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 1f841241099..4a3cd2cd2f5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -262,7 +262,7 @@ static inline int ipv6_addr_scope(const struct in6_addr *addr) static inline int __ipv6_addr_src_scope(int type) { - return (type == IPV6_ADDR_ANY ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16)); + return (type == IPV6_ADDR_ANY) ? __IPV6_ADDR_SCOPE_INVALID : (type >> 16); } static inline int ipv6_addr_src_scope(const struct in6_addr *addr) @@ -279,10 +279,10 @@ static inline int ipv6_masked_addr_cmp(const struct in6_addr *a1, const struct in6_addr *m, const struct in6_addr *a2) { - return (!!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | - ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | - ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | - ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3]))); + return !!(((a1->s6_addr32[0] ^ a2->s6_addr32[0]) & m->s6_addr32[0]) | + ((a1->s6_addr32[1] ^ a2->s6_addr32[1]) & m->s6_addr32[1]) | + ((a1->s6_addr32[2] ^ a2->s6_addr32[2]) & m->s6_addr32[2]) | + ((a1->s6_addr32[3] ^ a2->s6_addr32[3]) & m->s6_addr32[3])); } static inline void ipv6_addr_copy(struct in6_addr *a1, const struct in6_addr *a2) @@ -317,10 +317,10 @@ static inline void ipv6_addr_set(struct in6_addr *addr, static inline int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) { - return (((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0); + return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | + (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | + (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | + (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; } static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, @@ -373,20 +373,20 @@ int ip6_frag_match(struct inet_frag_queue *q, void *a); static inline int ipv6_addr_any(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | a->s6_addr32[3] ) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | a->s6_addr32[3]) == 0; } static inline int ipv6_addr_loopback(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + a->s6_addr32[2] | (a->s6_addr32[3] ^ htonl(1))) == 0; } static inline int ipv6_addr_v4mapped(const struct in6_addr *a) { - return ((a->s6_addr32[0] | a->s6_addr32[1] | - (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0); + return (a->s6_addr32[0] | a->s6_addr32[1] | + (a->s6_addr32[2] ^ htonl(0x0000ffff))) == 0; } /* @@ -395,8 +395,7 @@ static inline int ipv6_addr_v4mapped(const struct in6_addr *a) */ static inline int ipv6_addr_orchid(const struct in6_addr *a) { - return ((a->s6_addr32[0] & htonl(0xfffffff0)) - == htonl(0x20010010)); + return (a->s6_addr32[0] & htonl(0xfffffff0)) == htonl(0x20010010); } static inline void ipv6_addr_set_v4mapped(const __be32 addr, @@ -441,7 +440,7 @@ static inline int __ipv6_addr_diff(const void *token1, const void *token2, int a * if returned value is greater than prefix length. * --ANK (980803) */ - return (addrlen << 5); + return addrlen << 5; } static inline int ipv6_addr_diff(const struct in6_addr *a1, const struct in6_addr *a2) diff --git a/include/net/irda/irlap.h b/include/net/irda/irlap.h index 9d0c78ea92f..17fcd964f9d 100644 --- a/include/net/irda/irlap.h +++ b/include/net/irda/irlap.h @@ -282,7 +282,7 @@ static inline int irlap_is_primary(struct irlap_cb *self) default: ret = -1; } - return(ret); + return ret; } /* Clear a pending IrLAP disconnect. - Jean II */ diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index 3ffc1d0f93d..fff11b7fe8a 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -274,7 +274,7 @@ static inline int irlmp_lap_tx_queue_full(struct lsap_cb *self) if (self->lap->irlap == NULL) return 0; - return(IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD); + return IRLAP_GET_TX_QUEUE_LEN(self->lap->irlap) >= LAP_HIGH_THRESHOLD; } /* After doing a irlmp_dup(), this get one of the two socket back into diff --git a/include/net/irda/irttp.h b/include/net/irda/irttp.h index 11aee7a2972..af4b87721d1 100644 --- a/include/net/irda/irttp.h +++ b/include/net/irda/irttp.h @@ -204,7 +204,7 @@ static inline int irttp_is_primary(struct tsap_cb *self) (self->lsap->lap == NULL) || (self->lsap->lap->irlap == NULL)) return -2; - return(irlap_is_primary(self->lsap->lap->irlap)); + return irlap_is_primary(self->lsap->lap->irlap); } #endif /* IRTTP_H */ diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 3c8728aaab4..eda8808fdac 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -601,7 +601,7 @@ static inline u32 qdisc_l2t(struct qdisc_rate_table* rtab, unsigned int pktlen) slot = 0; slot >>= rtab->rate.cell_log; if (slot > 255) - return (rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]); + return rtab->data[255]*(slot >> 8) + rtab->data[slot & 0xFF]; return rtab->data[slot]; } diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 2cb3980b161..505845ddb0b 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -405,7 +405,7 @@ static inline void sctp_v6_del_protocol(void) { return; } /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) { - return (asoc?asoc->assoc_id:0); + return asoc ? asoc->assoc_id : 0; } /* Look up the association by its id. */ @@ -473,7 +473,7 @@ static inline void sctp_skb_set_owner_r(struct sk_buff *skb, struct sock *sk) /* Tests if the list has one and only one entry. */ static inline int sctp_list_single_entry(struct list_head *head) { - return ((head->next != head) && (head->next == head->prev)); + return (head->next != head) && (head->next == head->prev); } /* Generate a random jitter in the range of -50% ~ +50% of input RTO. */ @@ -631,13 +631,13 @@ static inline int sctp_sanity_check(void) /* This is the hash function for the SCTP port hash table. */ static inline int sctp_phashfn(__u16 lport) { - return (lport & (sctp_port_hashsize - 1)); + return lport & (sctp_port_hashsize - 1); } /* This is the hash function for the endpoint hash table. */ static inline int sctp_ep_hashfn(__u16 lport) { - return (lport & (sctp_ep_hashsize - 1)); + return lport & (sctp_ep_hashsize - 1); } /* This is the hash function for the association hash table. */ @@ -645,7 +645,7 @@ static inline int sctp_assoc_hashfn(__u16 lport, __u16 rport) { int h = (lport << 16) + rport; h ^= h>>8; - return (h & (sctp_assoc_hashsize - 1)); + return h & (sctp_assoc_hashsize - 1); } /* This is the hash function for the association hash table. This is @@ -656,7 +656,7 @@ static inline int sctp_vtag_hashfn(__u16 lport, __u16 rport, __u32 vtag) { int h = (lport << 16) + rport; h ^= vtag; - return (h & (sctp_assoc_hashsize-1)); + return h & (sctp_assoc_hashsize - 1); } #define sctp_for_each_hentry(epb, node, head) \ diff --git a/include/net/sctp/sm.h b/include/net/sctp/sm.h index 4088c89a905..9352d12f02d 100644 --- a/include/net/sctp/sm.h +++ b/include/net/sctp/sm.h @@ -345,12 +345,12 @@ enum { static inline int TSN_lt(__u32 s, __u32 t) { - return (((s) - (t)) & TSN_SIGN_BIT); + return ((s) - (t)) & TSN_SIGN_BIT; } static inline int TSN_lte(__u32 s, __u32 t) { - return (((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & TSN_SIGN_BIT); } /* Compare two SSNs */ @@ -369,12 +369,12 @@ enum { static inline int SSN_lt(__u16 s, __u16 t) { - return (((s) - (t)) & SSN_SIGN_BIT); + return ((s) - (t)) & SSN_SIGN_BIT; } static inline int SSN_lte(__u16 s, __u16 t) { - return (((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT)); + return ((s) == (t)) || (((s) - (t)) & SSN_SIGN_BIT); } /* @@ -388,7 +388,7 @@ enum { static inline int ADDIP_SERIAL_gte(__u16 s, __u16 t) { - return (((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT)); + return ((s) == (t)) || (((t) - (s)) & ADDIP_SERIAL_SIGN_BIT); } /* Check VTAG of the packet matches the sender's own tag. */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index f9e7473613b..69fef4fb79c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -847,7 +847,7 @@ void sctp_packet_free(struct sctp_packet *); static inline int sctp_packet_empty(struct sctp_packet *packet) { - return (packet->size == packet->overhead); + return packet->size == packet->overhead; } /* This represents a remote transport address. diff --git a/include/net/sctp/tsnmap.h b/include/net/sctp/tsnmap.h index 4aabc5a96cf..e7728bc14cc 100644 --- a/include/net/sctp/tsnmap.h +++ b/include/net/sctp/tsnmap.h @@ -157,7 +157,7 @@ __u16 sctp_tsnmap_pending(struct sctp_tsnmap *map); /* Is there a gap in the TSN map? */ static inline int sctp_tsnmap_has_gap(const struct sctp_tsnmap *map) { - return (map->cumulative_tsn_ack_point != map->max_tsn_seen); + return map->cumulative_tsn_ack_point != map->max_tsn_seen; } /* Mark a duplicate TSN. Note: limit the storage of duplicate TSN diff --git a/include/net/tipc/tipc_msg.h b/include/net/tipc/tipc_msg.h index 2e159a812f8..ffe50b4e7b9 100644 --- a/include/net/tipc/tipc_msg.h +++ b/include/net/tipc/tipc_msg.h @@ -107,7 +107,7 @@ static inline u32 msg_hdr_sz(struct tipc_msg *m) static inline int msg_short(struct tipc_msg *m) { - return (msg_hdr_sz(m) == 24); + return msg_hdr_sz(m) == 24; } static inline u32 msg_size(struct tipc_msg *m) @@ -117,7 +117,7 @@ static inline u32 msg_size(struct tipc_msg *m) static inline u32 msg_data_sz(struct tipc_msg *m) { - return (msg_size(m) - msg_hdr_sz(m)); + return msg_size(m) - msg_hdr_sz(m); } static inline unchar *msg_data(struct tipc_msg *m) @@ -132,17 +132,17 @@ static inline u32 msg_type(struct tipc_msg *m) static inline u32 msg_named(struct tipc_msg *m) { - return (msg_type(m) == TIPC_NAMED_MSG); + return msg_type(m) == TIPC_NAMED_MSG; } static inline u32 msg_mcast(struct tipc_msg *m) { - return (msg_type(m) == TIPC_MCAST_MSG); + return msg_type(m) == TIPC_MCAST_MSG; } static inline u32 msg_connected(struct tipc_msg *m) { - return (msg_type(m) == TIPC_CONN_MSG); + return msg_type(m) == TIPC_CONN_MSG; } static inline u32 msg_errcode(struct tipc_msg *m) diff --git a/net/802/fc.c b/net/802/fc.c index 34cf1ee014b..1e49f2d4ea9 100644 --- a/net/802/fc.c +++ b/net/802/fc.c @@ -70,7 +70,7 @@ static int fc_header(struct sk_buff *skb, struct net_device *dev, if(daddr) { memcpy(fch->daddr,daddr,dev->addr_len); - return(hdr_len); + return hdr_len; } return -hdr_len; } diff --git a/net/802/fddi.c b/net/802/fddi.c index 3ef0ab0a543..94b3ad08f39 100644 --- a/net/802/fddi.c +++ b/net/802/fddi.c @@ -82,10 +82,10 @@ static int fddi_header(struct sk_buff *skb, struct net_device *dev, if (daddr != NULL) { memcpy(fddi->daddr, daddr, dev->addr_len); - return(hl); + return hl; } - return(-hl); + return -hl; } @@ -108,7 +108,7 @@ static int fddi_rebuild_header(struct sk_buff *skb) { printk("%s: Don't know how to resolve type %04X addresses.\n", skb->dev->name, ntohs(fddi->hdr.llc_snap.ethertype)); - return(0); + return 0; } } @@ -162,7 +162,7 @@ __be16 fddi_type_trans(struct sk_buff *skb, struct net_device *dev) /* Assume 802.2 SNAP frames, for now */ - return(type); + return type; } EXPORT_SYMBOL(fddi_type_trans); @@ -170,9 +170,9 @@ EXPORT_SYMBOL(fddi_type_trans); int fddi_change_mtu(struct net_device *dev, int new_mtu) { if ((new_mtu < FDDI_K_SNAP_HLEN) || (new_mtu > FDDI_K_SNAP_DLEN)) - return(-EINVAL); + return -EINVAL; dev->mtu = new_mtu; - return(0); + return 0; } EXPORT_SYMBOL(fddi_change_mtu); diff --git a/net/802/hippi.c b/net/802/hippi.c index cd3e8e92952..91aca8780fd 100644 --- a/net/802/hippi.c +++ b/net/802/hippi.c @@ -152,7 +152,7 @@ int hippi_change_mtu(struct net_device *dev, int new_mtu) if ((new_mtu < 68) || (new_mtu > 65280)) return -EINVAL; dev->mtu = new_mtu; - return(0); + return 0; } EXPORT_SYMBOL(hippi_change_mtu); diff --git a/net/802/tr.c b/net/802/tr.c index 1c6e596074d..5e20cf8a074 100644 --- a/net/802/tr.c +++ b/net/802/tr.c @@ -145,7 +145,7 @@ static int tr_header(struct sk_buff *skb, struct net_device *dev, { memcpy(trh->daddr,daddr,dev->addr_len); tr_source_route(skb, trh, dev); - return(hdr_len); + return hdr_len; } return -hdr_len; diff --git a/net/8021q/vlan_core.c b/net/8021q/vlan_core.c index 889f4ac4459..0eb486d342d 100644 --- a/net/8021q/vlan_core.c +++ b/net/8021q/vlan_core.c @@ -27,7 +27,7 @@ int __vlan_hwaccel_rx(struct sk_buff *skb, struct vlan_group *grp, else if (vlan_id) goto drop; - return (polling ? netif_receive_skb(skb) : netif_rx(skb)); + return polling ? netif_receive_skb(skb) : netif_rx(skb); drop: dev_kfree_skb_any(skb); diff --git a/net/9p/client.c b/net/9p/client.c index dc6f2f26d02..f34b9f51081 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -61,13 +61,13 @@ static const match_table_t tokens = { inline int p9_is_proto_dotl(struct p9_client *clnt) { - return (clnt->proto_version == p9_proto_2000L); + return clnt->proto_version == p9_proto_2000L; } EXPORT_SYMBOL(p9_is_proto_dotl); inline int p9_is_proto_dotu(struct p9_client *clnt) { - return (clnt->proto_version == p9_proto_2000u); + return clnt->proto_version == p9_proto_2000u; } EXPORT_SYMBOL(p9_is_proto_dotu); diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 7dca91bb8c5..15ea84ba344 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -179,13 +179,13 @@ static unsigned char rfcomm_crc_table[256] = { /* FCS on 2 bytes */ static inline u8 __fcs(u8 *data) { - return (0xff - __crc(data)); + return 0xff - __crc(data); } /* FCS on 3 bytes */ static inline u8 __fcs2(u8 *data) { - return (0xff - rfcomm_crc_table[__crc(data) ^ data[2]]); + return 0xff - rfcomm_crc_table[__crc(data) ^ data[2]]; } /* Check FCS */ diff --git a/net/core/flow.c b/net/core/flow.c index b143b86b1f2..127c8a7ffd6 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -176,8 +176,8 @@ static u32 flow_hash_code(struct flow_cache *fc, { u32 *k = (u32 *) key; - return (jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) - & (flow_cache_hash_size(fc) - 1)); + return jhash2(k, (sizeof(*key) / sizeof(u32)), fcp->hash_rnd) + & (flow_cache_hash_size(fc) - 1); } typedef unsigned long flow_compare_t; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a4e0a7482c2..96b1a749abb 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -122,7 +122,7 @@ static void neigh_cleanup_and_release(struct neighbour *neigh) unsigned long neigh_rand_reach_time(unsigned long base) { - return (base ? (net_random() % base) + (base >> 1) : 0); + return base ? (net_random() % base) + (base >> 1) : 0; } EXPORT_SYMBOL(neigh_rand_reach_time); @@ -766,9 +766,9 @@ next_elt: static __inline__ int neigh_max_probes(struct neighbour *n) { struct neigh_parms *p = n->parms; - return (n->nud_state & NUD_PROBE ? + return (n->nud_state & NUD_PROBE) ? p->ucast_probes : - p->ucast_probes + p->app_probes + p->mcast_probes); + p->ucast_probes + p->app_probes + p->mcast_probes; } static void neigh_invalidate(struct neighbour *neigh) diff --git a/net/core/utils.c b/net/core/utils.c index ec6bb322f37..5fea0ab2190 100644 --- a/net/core/utils.c +++ b/net/core/utils.c @@ -75,7 +75,7 @@ __be32 in_aton(const char *str) str++; } } - return(htonl(l)); + return htonl(l); } EXPORT_SYMBOL(in_aton); diff --git a/net/dccp/ccids/lib/loss_interval.c b/net/dccp/ccids/lib/loss_interval.c index 8fc3cbf7907..497723c4d4b 100644 --- a/net/dccp/ccids/lib/loss_interval.c +++ b/net/dccp/ccids/lib/loss_interval.c @@ -116,7 +116,7 @@ u8 tfrc_lh_update_i_mean(struct tfrc_loss_hist *lh, struct sk_buff *skb) cur->li_length = len; tfrc_lh_calc_i_mean(lh); - return (lh->i_mean < old_i_mean); + return lh->i_mean < old_i_mean; } /* Determine if `new_loss' does begin a new loss interval [RFC 4342, 10.2] */ diff --git a/net/econet/af_econet.c b/net/econet/af_econet.c index baa98fb8355..f8c1ae4b41f 100644 --- a/net/econet/af_econet.c +++ b/net/econet/af_econet.c @@ -392,7 +392,7 @@ static int econet_sendmsg(struct kiocb *iocb, struct socket *sock, dev_queue_xmit(skb); dev_put(dev); mutex_unlock(&econet_mutex); - return(len); + return len; out_free: kfree_skb(skb); @@ -637,7 +637,7 @@ static int econet_create(struct net *net, struct socket *sock, int protocol, eo->num = protocol; econet_insert_socket(&econet_sklist, sk); - return(0); + return 0; out: return err; } diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 85e7b455132..f00ef2f1d81 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -387,6 +387,6 @@ ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len) l = _format_mac_addr(buf, PAGE_SIZE, addr, len); l += scnprintf(buf + l, PAGE_SIZE - l, "\n"); - return ((ssize_t) l); + return (ssize_t)l; } EXPORT_SYMBOL(sysfs_format_mac); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index dcfe7e961c1..4083c186fd3 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -567,7 +567,7 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, if (out_dev) omi = IN_DEV_MEDIUM_ID(out_dev); - return (omi != imi && omi != -1); + return omi != imi && omi != -1; } /* diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index 721a8a37b45..174be6caa5c 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -73,6 +73,6 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) inet->inet_id = jiffies; sk_dst_set(sk, &rt->dst); - return(0); + return 0; } EXPORT_SYMBOL(ip4_datagram_connect); diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index e5fa2ddce32..ba804266584 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -425,7 +425,7 @@ static int inet_diag_bc_run(const void *bc, int len, bc += op->no; } } - return (len == 0); + return len == 0; } static int valid_cc(const void *bc, int len, int cc) diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index f4dc879e258..168440834ad 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -116,11 +116,11 @@ static int ip4_frag_match(struct inet_frag_queue *q, void *a) struct ip4_create_arg *arg = a; qp = container_of(q, struct ipq, q); - return (qp->id == arg->iph->id && + return qp->id == arg->iph->id && qp->saddr == arg->iph->saddr && qp->daddr == arg->iph->daddr && qp->protocol == arg->iph->protocol && - qp->user == arg->user); + qp->user == arg->user; } /* Memory Tracking Functions. */ diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 714b6a80361..0967d02fefd 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -659,7 +659,7 @@ drop: rcu_read_unlock(); drop_nolock: kfree_skb(skb); - return(0); + return 0; } static netdev_tx_t ipgre_tunnel_xmit(struct sk_buff *skb, struct net_device *dev) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index e8f4f9a57f1..8b642f15246 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -72,7 +72,7 @@ static inline int arp_devaddr_compare(const struct arpt_devaddr_info *ap, for (i = 0; i < len; i++) ret |= (hdr_addr[i] ^ ap->addr[i]) & ap->mask[i]; - return (ret != 0); + return ret != 0; } /* diff --git a/net/ipv4/route.c b/net/ipv4/route.c index e24d48dd99d..ae1d4a41f1c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2791,7 +2791,7 @@ static int ipv4_dst_blackhole(struct net *net, struct rtable **rp, struct flowi dst_release(&(*rp)->dst); *rp = rt; - return (rt ? 0 : -ENOMEM); + return rt ? 0 : -ENOMEM; } int ip_route_output_flow(struct net *net, struct rtable **rp, struct flowi *flp, diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 1bc87a05c73..51966b3f971 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -2301,7 +2301,7 @@ static inline int tcp_dupack_heuristics(struct tcp_sock *tp) static inline int tcp_skb_timedout(struct sock *sk, struct sk_buff *skb) { - return (tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto); + return tcp_time_stamp - TCP_SKB_CB(skb)->when > inet_csk(sk)->icsk_rto; } static inline int tcp_head_timedout(struct sock *sk) @@ -3398,8 +3398,8 @@ static void tcp_ack_probe(struct sock *sk) static inline int tcp_ack_is_dubious(const struct sock *sk, const int flag) { - return (!(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || - inet_csk(sk)->icsk_ca_state != TCP_CA_Open); + return !(flag & FLAG_NOT_DUP) || (flag & FLAG_CA_ALERT) || + inet_csk(sk)->icsk_ca_state != TCP_CA_Open; } static inline int tcp_may_raise_cwnd(const struct sock *sk, const int flag) @@ -3416,9 +3416,9 @@ static inline int tcp_may_update_window(const struct tcp_sock *tp, const u32 ack, const u32 ack_seq, const u32 nwin) { - return (after(ack, tp->snd_una) || + return after(ack, tp->snd_una) || after(ack_seq, tp->snd_wl1) || - (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd)); + (ack_seq == tp->snd_wl1 && nwin > tp->snd_wnd); } /* Update our send window. diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index f25b56cb85c..43cf901d765 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -55,7 +55,7 @@ static __inline__ int tcp_in_window(u32 seq, u32 end_seq, u32 s_win, u32 e_win) return 1; if (after(end_seq, s_win) && before(seq, e_win)) return 1; - return (seq == e_win && seq == end_seq); + return seq == e_win && seq == end_seq; } /* diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index ea09d2fd50c..05b1ecf3676 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1370,9 +1370,9 @@ static inline int tcp_nagle_check(const struct tcp_sock *tp, const struct sk_buff *skb, unsigned mss_now, int nonagle) { - return (skb->len < mss_now && + return skb->len < mss_now && ((nonagle & TCP_NAGLE_CORK) || - (!nonagle && tp->packets_out && tcp_minshall_check(tp)))); + (!nonagle && tp->packets_out && tcp_minshall_check(tp))); } /* Return non-zero if the Nagle test allows this packet to be @@ -1443,10 +1443,10 @@ int tcp_may_send_now(struct sock *sk) struct tcp_sock *tp = tcp_sk(sk); struct sk_buff *skb = tcp_send_head(sk); - return (skb && + return skb && tcp_snd_test(sk, skb, tcp_current_mss(sk), (tcp_skb_is_last(sk, skb) ? - tp->nonagle : TCP_NAGLE_PUSH))); + tp->nonagle : TCP_NAGLE_PUSH)); } /* Trim TSO SKB to LEN bytes, put the remaining data into a new packet diff --git a/net/ipv4/tcp_westwood.c b/net/ipv4/tcp_westwood.c index 20151d6a624..a534dda5456 100644 --- a/net/ipv4/tcp_westwood.c +++ b/net/ipv4/tcp_westwood.c @@ -80,7 +80,7 @@ static void tcp_westwood_init(struct sock *sk) */ static inline u32 westwood_do_filter(u32 a, u32 b) { - return (((7 * a) + b) >> 3); + return ((7 * a) + b) >> 3; } static void westwood_filter(struct westwood *w, u32 delta) diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5bc893e2800..89aa54394a0 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -243,7 +243,7 @@ static inline bool addrconf_qdisc_ok(const struct net_device *dev) /* Check if a route is valid prefix route */ static inline int addrconf_is_prefix_route(const struct rt6_info *rt) { - return ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0); + return (rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0; } static void addrconf_del_timer(struct inet6_ifaddr *ifp) diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index f0e774cea38..921dcf6c271 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -513,10 +513,9 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) static inline int ip6addrlbl_msgsize(void) { - return (NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + return NLMSG_ALIGN(sizeof(struct ifaddrlblmsg)) + nla_total_size(16) /* IFAL_ADDRESS */ - + nla_total_size(4) /* IFAL_LABEL */ - ); + + nla_total_size(4); /* IFAL_LABEL */ } static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 56b9bf2516f..60220985bb8 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -467,7 +467,7 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr, if (ipv6_addr_type(&sin->sin6_addr) & IPV6_ADDR_LINKLOCAL) sin->sin6_scope_id = sk->sk_bound_dev_if; *uaddr_len = sizeof(*sin); - return(0); + return 0; } EXPORT_SYMBOL(inet6_getname); @@ -488,7 +488,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) case SIOCADDRT: case SIOCDELRT: - return(ipv6_route_ioctl(net, cmd, (void __user *)arg)); + return ipv6_route_ioctl(net, cmd, (void __user *)arg); case SIOCSIFADDR: return addrconf_add_ifaddr(net, (void __user *) arg); @@ -502,7 +502,7 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg) return sk->sk_prot->ioctl(sk, cmd, arg); } /*NOTREACHED*/ - return(0); + return 0; } EXPORT_SYMBOL(inet6_ioctl); diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index e1caa5d526c..14ed0a955b5 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -13,12 +13,12 @@ int ipv6_ext_hdr(u8 nexthdr) /* * find out if nexthdr is an extension header or a protocol */ - return ( (nexthdr == NEXTHDR_HOP) || + return (nexthdr == NEXTHDR_HOP) || (nexthdr == NEXTHDR_ROUTING) || (nexthdr == NEXTHDR_FRAGMENT) || (nexthdr == NEXTHDR_AUTH) || (nexthdr == NEXTHDR_NONE) || - (nexthdr == NEXTHDR_DEST) ); + (nexthdr == NEXTHDR_DEST); } /* diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 1838927a224..efbbbce68f9 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -870,8 +870,8 @@ static inline int ip6_rt_check(struct rt6key *rt_key, struct in6_addr *fl_addr, struct in6_addr *addr_cache) { - return ((rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache))); + return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && + (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 69a0051cea6..b3dd844cd34 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -228,12 +228,12 @@ static struct nd_opt_hdr *ndisc_next_option(struct nd_opt_hdr *cur, do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while(cur < end && cur->nd_opt_type != type); - return (cur <= end && cur->nd_opt_type == type ? cur : NULL); + return cur <= end && cur->nd_opt_type == type ? cur : NULL; } static inline int ndisc_is_useropt(struct nd_opt_hdr *opt) { - return (opt->nd_opt_type == ND_OPT_RDNSS); + return opt->nd_opt_type == ND_OPT_RDNSS; } static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, @@ -244,7 +244,7 @@ static struct nd_opt_hdr *ndisc_next_useropt(struct nd_opt_hdr *cur, do { cur = ((void *)cur) + (cur->nd_opt_len << 3); } while(cur < end && !ndisc_is_useropt(cur)); - return (cur <= end && ndisc_is_useropt(cur) ? cur : NULL); + return cur <= end && ndisc_is_useropt(cur) ? cur : NULL; } static struct ndisc_options *ndisc_parse_options(u8 *opt, int opt_len, @@ -319,7 +319,7 @@ static inline u8 *ndisc_opt_addr_data(struct nd_opt_hdr *p, int prepad = ndisc_addr_option_pad(dev->type); if (lladdrlen != NDISC_OPT_SPACE(dev->addr_len + prepad)) return NULL; - return (lladdr + prepad); + return lladdr + prepad; } int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int dir) diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index 8e754be92c2..6b331e9b570 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -82,13 +82,13 @@ EXPORT_SYMBOL_GPL(ip6t_alloc_initial_table); int ip6t_ext_hdr(u8 nexthdr) { - return ( (nexthdr == IPPROTO_HOPOPTS) || - (nexthdr == IPPROTO_ROUTING) || - (nexthdr == IPPROTO_FRAGMENT) || - (nexthdr == IPPROTO_ESP) || - (nexthdr == IPPROTO_AH) || - (nexthdr == IPPROTO_NONE) || - (nexthdr == IPPROTO_DSTOPTS) ); + return (nexthdr == IPPROTO_HOPOPTS) || + (nexthdr == IPPROTO_ROUTING) || + (nexthdr == IPPROTO_FRAGMENT) || + (nexthdr == IPPROTO_ESP) || + (nexthdr == IPPROTO_AH) || + (nexthdr == IPPROTO_NONE) || + (nexthdr == IPPROTO_DSTOPTS); } /* Returns whether matches rule or not. */ diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index e677937a07f..45e6efb7f17 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -764,7 +764,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, return -EINVAL; if (sin6->sin6_family && sin6->sin6_family != AF_INET6) - return(-EAFNOSUPPORT); + return -EAFNOSUPPORT; /* port is the proto value [0..255] carried in nexthdr */ proto = ntohs(sin6->sin6_port); @@ -772,10 +772,10 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!proto) proto = inet->inet_num; else if (proto != inet->inet_num) - return(-EINVAL); + return -EINVAL; if (proto > 255) - return(-EINVAL); + return -EINVAL; daddr = &sin6->sin6_addr; if (np->sndflow) { @@ -985,7 +985,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, /* You may get strange result with a positive odd offset; RFC2292bis agrees with me. */ if (val > 0 && (val&1)) - return(-EINVAL); + return -EINVAL; if (val < 0) { rp->checksum = 0; } else { @@ -997,7 +997,7 @@ static int do_rawv6_setsockopt(struct sock *sk, int level, int optname, break; default: - return(-ENOPROTOOPT); + return -ENOPROTOOPT; } } @@ -1190,7 +1190,7 @@ static int rawv6_init_sk(struct sock *sk) default: break; } - return(0); + return 0; } struct proto rawv6_prot = { diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d126365ac04..25b0beda433 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -217,14 +217,14 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static __inline__ int rt6_check_expired(const struct rt6_info *rt) { - return (rt->rt6i_flags & RTF_EXPIRES && - time_after(jiffies, rt->rt6i_expires)); + return (rt->rt6i_flags & RTF_EXPIRES) && + time_after(jiffies, rt->rt6i_expires); } static inline int rt6_need_strict(struct in6_addr *daddr) { - return (ipv6_addr_type(daddr) & - (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK)); + return ipv6_addr_type(daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL | IPV6_ADDR_LOOPBACK); } /* @@ -440,7 +440,7 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) __func__, match); net = dev_net(rt0->rt6i_dev); - return (match ? match : net->ipv6.ip6_null_entry); + return match ? match : net->ipv6.ip6_null_entry; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -859,7 +859,7 @@ int ip6_dst_blackhole(struct sock *sk, struct dst_entry **dstp, struct flowi *fl dst_release(*dstp); *dstp = new; - return (new ? 0 : -ENOMEM); + return new ? 0 : -ENOMEM; } EXPORT_SYMBOL_GPL(ip6_dst_blackhole); @@ -1070,7 +1070,7 @@ static int ip6_dst_gc(struct dst_ops *ops) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; - return (atomic_read(&ops->entries) > rt_max_size); + return atomic_read(&ops->entries) > rt_max_size; } /* Clean host part of a prefix. Not necessary in radix tree, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fe6d40418c0..8d93f6d8197 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -139,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, return -EINVAL; if (usin->sin6_family != AF_INET6) - return(-EAFNOSUPPORT); + return -EAFNOSUPPORT; memset(&fl, 0, sizeof(fl)); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 6baeabbbca8..39676eac3a3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); xfrm6_policy_afinfo.garbage_collect(net); - return (atomic_read(&ops->entries) > ops->gc_thresh * 2); + return atomic_read(&ops->entries) > ops->gc_thresh * 2; } static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index fd55b5135de..bf3635129b1 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -573,9 +573,9 @@ static int irda_find_lsap_sel(struct irda_sock *self, char *name) /* Requested object/attribute doesn't exist */ if((self->errno == IAS_CLASS_UNKNOWN) || (self->errno == IAS_ATTRIB_UNKNOWN)) - return (-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; else - return (-EHOSTUNREACH); + return -EHOSTUNREACH; } /* Get the remote TSAP selector */ @@ -663,7 +663,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) __func__, name); self->daddr = DEV_ADDR_ANY; kfree(discoveries); - return(-ENOTUNIQ); + return -ENOTUNIQ; } /* First time we found that one, save it ! */ daddr = self->daddr; @@ -677,7 +677,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) IRDA_DEBUG(0, "%s(), unexpected IAS query failure\n", __func__); self->daddr = DEV_ADDR_ANY; kfree(discoveries); - return(-EHOSTUNREACH); + return -EHOSTUNREACH; break; } } @@ -689,7 +689,7 @@ static int irda_discover_daddr_and_lsap_sel(struct irda_sock *self, char *name) IRDA_DEBUG(1, "%s(), cannot discover service ''%s'' in any device !!!\n", __func__, name); self->daddr = DEV_ADDR_ANY; - return(-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; } /* Revert back to discovered device & service */ @@ -2465,9 +2465,9 @@ bed: /* Requested object/attribute doesn't exist */ if((self->errno == IAS_CLASS_UNKNOWN) || (self->errno == IAS_ATTRIB_UNKNOWN)) - return (-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; else - return (-EHOSTUNREACH); + return -EHOSTUNREACH; } /* Translate from internal to user structure */ diff --git a/net/irda/discovery.c b/net/irda/discovery.c index c1c8ae93912..36c3f037f17 100644 --- a/net/irda/discovery.c +++ b/net/irda/discovery.c @@ -315,7 +315,7 @@ struct irda_device_info *irlmp_copy_discoveries(hashbin_t *log, int *pn, /* Get the actual number of device in the buffer and return */ *pn = i; - return(buffer); + return buffer; } #ifdef CONFIG_PROC_FS diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index faa82ca2dfd..a39cca8331d 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -449,8 +449,8 @@ static int ircomm_tty_open(struct tty_struct *tty, struct file *filp) } #ifdef SERIAL_DO_RESTART - return ((self->flags & ASYNC_HUP_NOTIFY) ? - -EAGAIN : -ERESTARTSYS); + return (self->flags & ASYNC_HUP_NOTIFY) ? + -EAGAIN : -ERESTARTSYS; #else return -EAGAIN; #endif diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index 0e7d8bde145..6115a44c0a2 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -939,7 +939,7 @@ struct irda_device_info *irlmp_get_discoveries(int *pn, __u16 mask, int nslots) } /* Return current cached discovery log */ - return(irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE)); + return irlmp_copy_discoveries(irlmp->cachelog, pn, mask, TRUE); } EXPORT_SYMBOL(irlmp_get_discoveries); diff --git a/net/irda/irlmp_frame.c b/net/irda/irlmp_frame.c index 3750884094d..062e63b1c5c 100644 --- a/net/irda/irlmp_frame.c +++ b/net/irda/irlmp_frame.c @@ -448,7 +448,7 @@ static struct lsap_cb *irlmp_find_lsap(struct lap_cb *self, __u8 dlsap_sel, (self->cache.slsap_sel == slsap_sel) && (self->cache.dlsap_sel == dlsap_sel)) { - return (self->cache.lsap); + return self->cache.lsap; } #endif diff --git a/net/irda/irnet/irnet_irda.c b/net/irda/irnet/irnet_irda.c index e98e40d76f4..7f17a8020e8 100644 --- a/net/irda/irnet/irnet_irda.c +++ b/net/irda/irnet/irnet_irda.c @@ -238,7 +238,7 @@ irnet_ias_to_tsap(irnet_socket * self, DEXIT(IRDA_SR_TRACE, "\n"); /* Return the TSAP */ - return(dtsap_sel); + return dtsap_sel; } /*------------------------------------------------------------------*/ @@ -301,7 +301,7 @@ irnet_connect_tsap(irnet_socket * self) { clear_bit(0, &self->ttp_connect); DERROR(IRDA_SR_ERROR, "connect aborted!\n"); - return(err); + return err; } /* Connect to remote device */ @@ -312,7 +312,7 @@ irnet_connect_tsap(irnet_socket * self) { clear_bit(0, &self->ttp_connect); DERROR(IRDA_SR_ERROR, "connect aborted!\n"); - return(err); + return err; } /* The above call is non-blocking. @@ -321,7 +321,7 @@ irnet_connect_tsap(irnet_socket * self) * See you there ;-) */ DEXIT(IRDA_SR_TRACE, "\n"); - return(err); + return err; } /*------------------------------------------------------------------*/ @@ -362,10 +362,10 @@ irnet_discover_next_daddr(irnet_socket * self) /* The above request is non-blocking. * After a while, IrDA will call us back in irnet_discovervalue_confirm() * We will then call irnet_ias_to_tsap() and come back here again... */ - return(0); + return 0; } else - return(1); + return 1; } /*------------------------------------------------------------------*/ @@ -436,7 +436,7 @@ irnet_discover_daddr_and_lsap_sel(irnet_socket * self) /* Follow me in irnet_discovervalue_confirm() */ DEXIT(IRDA_SR_TRACE, "\n"); - return(0); + return 0; } /*------------------------------------------------------------------*/ @@ -485,7 +485,7 @@ irnet_dname_to_daddr(irnet_socket * self) /* No luck ! */ DEBUG(IRDA_SR_INFO, "cannot discover device ``%s'' !!!\n", self->rname); kfree(discoveries); - return(-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; } @@ -527,7 +527,7 @@ irda_irnet_create(irnet_socket * self) INIT_WORK(&self->disconnect_work, irnet_ppp_disconnect); DEXIT(IRDA_SOCK_TRACE, "\n"); - return(0); + return 0; } /*------------------------------------------------------------------*/ @@ -601,7 +601,7 @@ irda_irnet_connect(irnet_socket * self) * We will finish the connection procedure in irnet_connect_tsap(). */ DEXIT(IRDA_SOCK_TRACE, "\n"); - return(0); + return 0; } /*------------------------------------------------------------------*/ @@ -733,7 +733,7 @@ irnet_daddr_to_dname(irnet_socket * self) /* No luck ! */ DEXIT(IRDA_SERV_INFO, ": cannot discover device 0x%08x !!!\n", self->daddr); kfree(discoveries); - return(-EADDRNOTAVAIL); + return -EADDRNOTAVAIL; } /*------------------------------------------------------------------*/ diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index dfe7b38dd4a..69f1fa64994 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -166,7 +166,7 @@ irnet_ctrl_write(irnet_socket * ap, } /* Success : we have parsed all commands successfully */ - return(count); + return count; } #ifdef INITIAL_DISCOVERY @@ -300,7 +300,7 @@ irnet_ctrl_read(irnet_socket * ap, } DEXIT(CTRL_TRACE, "\n"); - return(strlen(event)); + return strlen(event); } #endif /* INITIAL_DISCOVERY */ @@ -409,7 +409,7 @@ irnet_ctrl_read(irnet_socket * ap, } DEXIT(CTRL_TRACE, "\n"); - return(strlen(event)); + return strlen(event); } /*------------------------------------------------------------------*/ @@ -623,7 +623,7 @@ dev_irnet_poll(struct file * file, mask |= irnet_ctrl_poll(ap, file, wait); DEXIT(FS_TRACE, " - mask=0x%X\n", mask); - return(mask); + return mask; } /*------------------------------------------------------------------*/ diff --git a/net/key/af_key.c b/net/key/af_key.c index 43040e97c47..d87c22df6f1 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -565,12 +565,12 @@ pfkey_proto2satype(uint16_t proto) static uint8_t pfkey_proto_to_xfrm(uint8_t proto) { - return (proto == IPSEC_PROTO_ANY ? 0 : proto); + return proto == IPSEC_PROTO_ANY ? 0 : proto; } static uint8_t pfkey_proto_from_xfrm(uint8_t proto) { - return (proto ? proto : IPSEC_PROTO_ANY); + return proto ? proto : IPSEC_PROTO_ANY; } static inline int pfkey_sockaddr_len(sa_family_t family) diff --git a/net/mac80211/rate.c b/net/mac80211/rate.c index 4f772de2f21..b0cc385bf98 100644 --- a/net/mac80211/rate.c +++ b/net/mac80211/rate.c @@ -207,7 +207,7 @@ static bool rc_no_data_or_no_ack(struct ieee80211_tx_rate_control *txrc) fc = hdr->frame_control; - return ((info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc)); + return (info->flags & IEEE80211_TX_CTL_NO_ACK) || !ieee80211_is_data(fc); } static void rc_send_low_broadcast(s8 *idx, u32 basic_rates, u8 max_rate_idx) diff --git a/net/rfkill/input.c b/net/rfkill/input.c index 3713d7ecab9..1bca6d49ec9 100644 --- a/net/rfkill/input.c +++ b/net/rfkill/input.c @@ -142,7 +142,7 @@ static unsigned long rfkill_last_scheduled; static unsigned long rfkill_ratelimit(const unsigned long last) { const unsigned long delay = msecs_to_jiffies(RFKILL_OPS_DELAY); - return (time_after(jiffies, last + delay)) ? 0 : delay; + return time_after(jiffies, last + delay) ? 0 : delay; } static void rfkill_schedule_ratelimited(void) diff --git a/net/rose/rose_link.c b/net/rose/rose_link.c index a750a28e022..fa5f5641a2c 100644 --- a/net/rose/rose_link.c +++ b/net/rose/rose_link.c @@ -114,7 +114,7 @@ static int rose_send_frame(struct sk_buff *skb, struct rose_neigh *neigh) if (ax25s) ax25_cb_put(ax25s); - return (neigh->ax25 != NULL); + return neigh->ax25 != NULL; } /* @@ -137,7 +137,7 @@ static int rose_link_up(struct rose_neigh *neigh) if (ax25s) ax25_cb_put(ax25s); - return (neigh->ax25 != NULL); + return neigh->ax25 != NULL; } /* diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index f774e657641..1ef29c74d85 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -799,7 +799,7 @@ static void sctp_inet_skb_msgname(struct sk_buff *skb, char *msgname, int *len) static int sctp_inet_af_supported(sa_family_t family, struct sctp_sock *sp) { /* PF_INET only supports AF_INET addresses. */ - return (AF_INET == family); + return AF_INET == family; } /* Address matching with wildcards allowed. */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 6a691d84aef..535659fdbaa 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3884,7 +3884,7 @@ static int sctp_getsockopt_sctp_status(struct sock *sk, int len, } out: - return (retval); + return retval; } @@ -3940,7 +3940,7 @@ static int sctp_getsockopt_peer_addr_info(struct sock *sk, int len, } out: - return (retval); + return retval; } /* 7.1.12 Enable/Disable message fragmentation (SCTP_DISABLE_FRAGMENTS) @@ -5594,7 +5594,7 @@ static int sctp_get_port(struct sock *sk, unsigned short snum) /* Note: sk->sk_num gets filled in if ephemeral port request. */ ret = sctp_get_port_local(sk, &addr); - return (ret ? 1 : 0); + return ret ? 1 : 0; } /* diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index dcfc66bab2b..597c493392a 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -1049,7 +1049,7 @@ gss_match(struct auth_cred *acred, struct rpc_cred *rc, int flags) out: if (acred->machine_cred != gss_cred->gc_machine_cred) return 0; - return (rc->cr_uid == acred->uid); + return rc->cr_uid == acred->uid; } /* diff --git a/net/sunrpc/auth_gss/gss_generic_token.c b/net/sunrpc/auth_gss/gss_generic_token.c index 310b78e9945..c586e92bcf7 100644 --- a/net/sunrpc/auth_gss/gss_generic_token.c +++ b/net/sunrpc/auth_gss/gss_generic_token.c @@ -76,19 +76,19 @@ static int der_length_size( int length) { if (length < (1<<7)) - return(1); + return 1; else if (length < (1<<8)) - return(2); + return 2; #if (SIZEOF_INT == 2) else - return(3); + return 3; #else else if (length < (1<<16)) - return(3); + return 3; else if (length < (1<<24)) - return(4); + return 4; else - return(5); + return 5; #endif } @@ -121,14 +121,14 @@ der_read_length(unsigned char **buf, int *bufsize) int ret; if (*bufsize < 1) - return(-1); + return -1; sf = *(*buf)++; (*bufsize)--; if (sf & 0x80) { if ((sf &= 0x7f) > ((*bufsize)-1)) - return(-1); + return -1; if (sf > SIZEOF_INT) - return (-1); + return -1; ret = 0; for (; sf; sf--) { ret = (ret<<8) + (*(*buf)++); @@ -138,7 +138,7 @@ der_read_length(unsigned char **buf, int *bufsize) ret = sf; } - return(ret); + return ret; } /* returns the length of a token, given the mech oid and the body size */ @@ -148,7 +148,7 @@ g_token_size(struct xdr_netobj *mech, unsigned int body_size) { /* set body_size to sequence contents size */ body_size += 2 + (int) mech->len; /* NEED overflow check */ - return(1 + der_length_size(body_size) + body_size); + return 1 + der_length_size(body_size) + body_size; } EXPORT_SYMBOL_GPL(g_token_size); @@ -186,27 +186,27 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, int ret = 0; if ((toksize-=1) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if (*buf++ != 0x60) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if ((seqsize = der_read_length(&buf, &toksize)) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if (seqsize != toksize) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if ((toksize-=1) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if (*buf++ != 0x06) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if ((toksize-=1) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; toid.len = *buf++; if ((toksize-=toid.len) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; toid.data = buf; buf+=toid.len; @@ -217,17 +217,17 @@ g_verify_token_header(struct xdr_netobj *mech, int *body_size, to return G_BAD_TOK_HEADER if the token header is in fact bad */ if ((toksize-=2) < 0) - return(G_BAD_TOK_HEADER); + return G_BAD_TOK_HEADER; if (ret) - return(ret); + return ret; if (!ret) { *buf_in = buf; *body_size = toksize; } - return(ret); + return ret; } EXPORT_SYMBOL_GPL(g_verify_token_header); diff --git a/net/sunrpc/auth_gss/gss_krb5_seqnum.c b/net/sunrpc/auth_gss/gss_krb5_seqnum.c index 415c013ba38..62ac90c62cb 100644 --- a/net/sunrpc/auth_gss/gss_krb5_seqnum.c +++ b/net/sunrpc/auth_gss/gss_krb5_seqnum.c @@ -162,5 +162,5 @@ krb5_get_seq_num(struct krb5_ctx *kctx, *seqnum = ((plain[0]) | (plain[1] << 8) | (plain[2] << 16) | (plain[3] << 24)); - return (0); + return 0; } diff --git a/net/sunrpc/auth_gss/gss_mech_switch.c b/net/sunrpc/auth_gss/gss_mech_switch.c index 2689de39dc7..8b4061049d7 100644 --- a/net/sunrpc/auth_gss/gss_mech_switch.c +++ b/net/sunrpc/auth_gss/gss_mech_switch.c @@ -331,7 +331,7 @@ gss_delete_sec_context(struct gss_ctx **context_handle) *context_handle); if (!*context_handle) - return(GSS_S_NO_CONTEXT); + return GSS_S_NO_CONTEXT; if ((*context_handle)->internal_ctx_id) (*context_handle)->mech_type->gm_ops ->gss_delete_sec_context((*context_handle) diff --git a/net/sunrpc/sched.c b/net/sunrpc/sched.c index cace6049e4a..aa5dbda6608 100644 --- a/net/sunrpc/sched.c +++ b/net/sunrpc/sched.c @@ -376,7 +376,7 @@ int rpc_queue_empty(struct rpc_wait_queue *queue) spin_lock_bh(&queue->lock); res = queue->qlen; spin_unlock_bh(&queue->lock); - return (res == 0); + return res == 0; } EXPORT_SYMBOL_GPL(rpc_queue_empty); diff --git a/net/tipc/addr.c b/net/tipc/addr.c index c048543ffbe..2ddc351b3be 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -89,7 +89,7 @@ int tipc_addr_domain_valid(u32 addr) int tipc_addr_node_valid(u32 addr) { - return (tipc_addr_domain_valid(addr) && tipc_node(addr)); + return tipc_addr_domain_valid(addr) && tipc_node(addr); } int tipc_in_scope(u32 domain, u32 addr) diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index b11248c2d78..ecfaac10d0b 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -184,7 +184,7 @@ static void bclink_set_gap(struct tipc_node *n_ptr) static int bclink_ack_allowed(u32 n) { - return((n % TIPC_MIN_LINK_WIN) == tipc_own_tag); + return (n % TIPC_MIN_LINK_WIN) == tipc_own_tag; } diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c index 52ae17b2583..9c10c6b7c12 100644 --- a/net/tipc/bearer.c +++ b/net/tipc/bearer.c @@ -63,7 +63,7 @@ static int media_name_valid(const char *name) len = strlen(name); if ((len + 1) > TIPC_MAX_MEDIA_NAME) return 0; - return (strspn(name, tipc_alphabet) == len); + return strspn(name, tipc_alphabet) == len; } /** diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 1885a7edb0c..6569d45bfb9 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -134,7 +134,7 @@ void tipc_printbuf_reset(struct print_buf *pb) int tipc_printbuf_empty(struct print_buf *pb) { - return (!pb->buf || (pb->crs == pb->buf)); + return !pb->buf || (pb->crs == pb->buf); } /** @@ -169,7 +169,7 @@ int tipc_printbuf_validate(struct print_buf *pb) tipc_printf(pb, err); } } - return (pb->crs - pb->buf + 1); + return pb->crs - pb->buf + 1; } /** diff --git a/net/tipc/link.c b/net/tipc/link.c index a6a3102bb4d..b8cf1e9d0b8 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -239,13 +239,13 @@ int tipc_link_is_up(struct link *l_ptr) { if (!l_ptr) return 0; - return (link_working_working(l_ptr) || link_working_unknown(l_ptr)); + return link_working_working(l_ptr) || link_working_unknown(l_ptr); } int tipc_link_is_active(struct link *l_ptr) { - return ((l_ptr->owner->active_links[0] == l_ptr) || - (l_ptr->owner->active_links[1] == l_ptr)); + return (l_ptr->owner->active_links[0] == l_ptr) || + (l_ptr->owner->active_links[1] == l_ptr); } /** diff --git a/net/tipc/link.h b/net/tipc/link.h index 2e5385c47d3..26151d30589 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -279,12 +279,12 @@ static inline int between(u32 lower, u32 upper, u32 n) static inline int less_eq(u32 left, u32 right) { - return (mod(right - left) < 32768u); + return mod(right - left) < 32768u; } static inline int less(u32 left, u32 right) { - return (less_eq(left, right) && (mod(right) != mod(left))); + return less_eq(left, right) && (mod(right) != mod(left)); } static inline u32 lesser(u32 left, u32 right) @@ -299,32 +299,32 @@ static inline u32 lesser(u32 left, u32 right) static inline int link_working_working(struct link *l_ptr) { - return (l_ptr->state == WORKING_WORKING); + return l_ptr->state == WORKING_WORKING; } static inline int link_working_unknown(struct link *l_ptr) { - return (l_ptr->state == WORKING_UNKNOWN); + return l_ptr->state == WORKING_UNKNOWN; } static inline int link_reset_unknown(struct link *l_ptr) { - return (l_ptr->state == RESET_UNKNOWN); + return l_ptr->state == RESET_UNKNOWN; } static inline int link_reset_reset(struct link *l_ptr) { - return (l_ptr->state == RESET_RESET); + return l_ptr->state == RESET_RESET; } static inline int link_blocked(struct link *l_ptr) { - return (l_ptr->exp_msg_count || l_ptr->blocked); + return l_ptr->exp_msg_count || l_ptr->blocked; } static inline int link_congested(struct link *l_ptr) { - return (l_ptr->out_queue_size >= l_ptr->queue_limit[0]); + return l_ptr->out_queue_size >= l_ptr->queue_limit[0]; } #endif diff --git a/net/tipc/msg.h b/net/tipc/msg.h index 995d2da35b0..031aad18efc 100644 --- a/net/tipc/msg.h +++ b/net/tipc/msg.h @@ -104,7 +104,7 @@ static inline u32 msg_user(struct tipc_msg *m) static inline u32 msg_isdata(struct tipc_msg *m) { - return (msg_user(m) <= TIPC_CRITICAL_IMPORTANCE); + return msg_user(m) <= TIPC_CRITICAL_IMPORTANCE; } static inline void msg_set_user(struct tipc_msg *m, u32 n) @@ -289,7 +289,7 @@ static inline void msg_set_destnode(struct tipc_msg *m, u32 a) static inline int msg_is_dest(struct tipc_msg *m, u32 d) { - return(msg_short(m) || (msg_destnode(m) == d)); + return msg_short(m) || (msg_destnode(m) == d); } static inline u32 msg_routed(struct tipc_msg *m) @@ -632,7 +632,7 @@ static inline void msg_set_bcast_tag(struct tipc_msg *m, u32 n) static inline u32 msg_max_pkt(struct tipc_msg *m) { - return (msg_bits(m, 9, 16, 0xffff) * 4); + return msg_bits(m, 9, 16, 0xffff) * 4; } static inline void msg_set_max_pkt(struct tipc_msg *m, u32 n) diff --git a/net/tipc/name_table.c b/net/tipc/name_table.c index c13c2c7c4b5..9ca4b068923 100644 --- a/net/tipc/name_table.c +++ b/net/tipc/name_table.c @@ -116,7 +116,7 @@ DEFINE_RWLOCK(tipc_nametbl_lock); static int hash(int x) { - return(x & (tipc_nametbl_size - 1)); + return x & (tipc_nametbl_size - 1); } /** diff --git a/net/tipc/node.c b/net/tipc/node.c index b702c7bf580..7c49cd056df 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -242,17 +242,17 @@ int tipc_node_has_active_links(struct tipc_node *n_ptr) int tipc_node_has_redundant_links(struct tipc_node *n_ptr) { - return (n_ptr->working_links > 1); + return n_ptr->working_links > 1; } static int tipc_node_has_active_routes(struct tipc_node *n_ptr) { - return (n_ptr && (n_ptr->last_router >= 0)); + return n_ptr && (n_ptr->last_router >= 0); } int tipc_node_is_up(struct tipc_node *n_ptr) { - return (tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr)); + return tipc_node_has_active_links(n_ptr) || tipc_node_has_active_routes(n_ptr); } struct tipc_node *tipc_node_attach_link(struct link *l_ptr) diff --git a/net/tipc/port.h b/net/tipc/port.h index 8d1652aab29..e74bd956373 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -157,7 +157,7 @@ static inline u32 tipc_peer_node(struct port *p_ptr) static inline int tipc_port_congested(struct port *p_ptr) { - return((p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2)); + return (p_ptr->sent - p_ptr->acked) >= (TIPC_FLOW_CONTROL_WIN * 2); } /** diff --git a/net/tipc/socket.c b/net/tipc/socket.c index f7ac94de24f..33217fc3d69 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -1195,7 +1195,7 @@ static int rx_queue_full(struct tipc_msg *msg, u32 queue_size, u32 base) if (msg_connected(msg)) threshold *= 4; - return (queue_size >= threshold); + return queue_size >= threshold; } /** diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index ab6eab4c45e..1a5b9a6bd12 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -604,6 +604,6 @@ int tipc_ispublished(struct tipc_name const *name) { u32 domain = 0; - return(tipc_nametbl_translate(name->type, name->instance,&domain) != 0); + return tipc_nametbl_translate(name->type, name->instance, &domain) != 0; } diff --git a/net/wireless/core.h b/net/wireless/core.h index 37580e090a3..5d89310b358 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -86,7 +86,7 @@ struct cfg80211_registered_device *wiphy_to_dev(struct wiphy *wiphy) static inline bool wiphy_idx_valid(int wiphy_idx) { - return (wiphy_idx >= 0); + return wiphy_idx >= 0; } -- cgit v1.2.3 From eb7d3066cf864342e8ae6a5c1126a1602c4d06c0 Mon Sep 17 00:00:00 2001 From: Christian Lamparter Date: Tue, 21 Sep 2010 21:36:18 +0200 Subject: mac80211: clear txflags for ps-filtered frames This patch fixes stale mac80211_tx_control_flags for filtered / retried frames. Because ieee80211_handle_filtered_frame feeds skbs back into the tx path, they have to be stripped of some tx flags so they won't confuse the stack, driver or device. Cc: Acked-by: Johannes Berg Signed-off-by: Christian Lamparter Signed-off-by: John W. Linville --- include/net/mac80211.h | 16 ++++++++++++++++ net/mac80211/status.c | 1 + 2 files changed, 17 insertions(+) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 12a49f0ba32..5d1187d7c5e 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -321,6 +321,9 @@ struct ieee80211_bss_conf { * @IEEE80211_TX_CTL_LDPC: tells the driver to use LDPC for this frame * @IEEE80211_TX_CTL_STBC: Enables Space-Time Block Coding (STBC) for this * frame and selects the maximum number of streams that it can use. + * + * Note: If you have to add new flags to the enumeration, then don't + * forget to update %IEEE80211_TX_TEMPORARY_FLAGS when necessary. */ enum mac80211_tx_control_flags { IEEE80211_TX_CTL_REQ_TX_STATUS = BIT(0), @@ -350,6 +353,19 @@ enum mac80211_tx_control_flags { #define IEEE80211_TX_CTL_STBC_SHIFT 23 +/* + * This definition is used as a mask to clear all temporary flags, which are + * set by the tx handlers for each transmission attempt by the mac80211 stack. + */ +#define IEEE80211_TX_TEMPORARY_FLAGS (IEEE80211_TX_CTL_NO_ACK | \ + IEEE80211_TX_CTL_CLEAR_PS_FILT | IEEE80211_TX_CTL_FIRST_FRAGMENT | \ + IEEE80211_TX_CTL_SEND_AFTER_DTIM | IEEE80211_TX_CTL_AMPDU | \ + IEEE80211_TX_STAT_TX_FILTERED | IEEE80211_TX_STAT_ACK | \ + IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_STAT_AMPDU_NO_BACK | \ + IEEE80211_TX_CTL_RATE_CTRL_PROBE | IEEE80211_TX_CTL_PSPOLL_RESPONSE | \ + IEEE80211_TX_CTL_MORE_FRAMES | IEEE80211_TX_CTL_LDPC | \ + IEEE80211_TX_CTL_STBC) + /** * enum mac80211_rate_control_flags - per-rate flags set by the * Rate Control algorithm. diff --git a/net/mac80211/status.c b/net/mac80211/status.c index 571b32bfc54..dd85006c4fe 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -58,6 +58,7 @@ static void ieee80211_handle_filtered_frame(struct ieee80211_local *local, info->control.vif = &sta->sdata->vif; info->flags |= IEEE80211_TX_INTFL_NEED_TXPROCESSING | IEEE80211_TX_INTFL_RETRANSMISSION; + info->flags &= ~IEEE80211_TX_TEMPORARY_FLAGS; sta->tx_filtered_count++; -- cgit v1.2.3 From 7a91b434e2bad554b709265db7603b1aa52dd92e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 26 Sep 2010 18:53:07 -0700 Subject: net: update SOCK_MIN_RCVBUF SOCK_MIN_RCVBUF current value is 256 bytes It doesnt permit to receive the smallest possible frame, considering socket sk_rmem_alloc/sk_rcvbuf account skb truesizes. On 64bit arches, sizeof(struct sk_buff) is 240 bytes. Add the typical 64 bytes of headroom, and we go over the limit. With old kernels and 32bit arches, we were under the limit, if netdriver was doing copybreak. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/sock.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 8ae97c4970d..73a4f9702a6 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -1558,7 +1558,11 @@ static inline void sk_wake_async(struct sock *sk, int how, int band) } #define SOCK_MIN_SNDBUF 2048 -#define SOCK_MIN_RCVBUF 256 +/* + * Since sk_rmem_alloc sums skb->truesize, even a small frame might need + * sizeof(sk_buff) + MTU + padding, unless net driver perform copybreak + */ +#define SOCK_MIN_RCVBUF (2048 + sizeof(struct sk_buff)) static inline void sk_stream_moderate_sndbuf(struct sock *sk) { -- cgit v1.2.3 From 686b9cb994f5f74be790df4cd12873dfdc8a6984 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 23 Sep 2010 09:44:36 -0700 Subject: mac80211/ath9k: Support AMPDU with multiple VIFs. The old ieee80211_find_sta_by_hw method didn't properly find VIFS when there was more than one per AP. This caused AMPDU logic in ath9k to get the wrong VIF when trying to account for transmitted SKBs. This patch changes ieee80211_find_sta_by_hw to take a localaddr argument to distinguish between VIFs with the same AP but different local addresses. The method name is changed to ieee80211_find_sta_by_ifaddr. Signed-off-by: Ben Greear Acked-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/ath/ath9k/recv.c | 6 +++++- drivers/net/wireless/ath/ath9k/xmit.c | 3 +-- include/net/mac80211.h | 25 ++++++++++++++----------- net/mac80211/sta_info.c | 15 +++++++++++---- 4 files changed, 31 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c index 00140489bec..9c166f3804a 100644 --- a/drivers/net/wireless/ath/ath9k/recv.c +++ b/drivers/net/wireless/ath/ath9k/recv.c @@ -977,7 +977,11 @@ static void ath9k_process_rssi(struct ath_common *common, * at least one sdata of a wiphy on mac80211 but with ath9k virtual * wiphy you'd have to iterate over every wiphy and each sdata. */ - sta = ieee80211_find_sta_by_hw(hw, hdr->addr2); + if (is_multicast_ether_addr(hdr->addr1)) + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr2, NULL); + else + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr2, hdr->addr1); + if (sta) { an = (struct ath_node *) sta->drv_priv; if (rx_stats->rs_rssi != ATH9K_RSSI_BAD && diff --git a/drivers/net/wireless/ath/ath9k/xmit.c b/drivers/net/wireless/ath/ath9k/xmit.c index 85a7323a04e..f7da6b20a92 100644 --- a/drivers/net/wireless/ath/ath9k/xmit.c +++ b/drivers/net/wireless/ath/ath9k/xmit.c @@ -328,8 +328,7 @@ static void ath_tx_complete_aggr(struct ath_softc *sc, struct ath_txq *txq, rcu_read_lock(); - /* XXX: use ieee80211_find_sta! */ - sta = ieee80211_find_sta_by_hw(hw, hdr->addr1); + sta = ieee80211_find_sta_by_ifaddr(hw, hdr->addr1, hdr->addr2); if (!sta) { rcu_read_unlock(); diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 5d1187d7c5e..73469d8b64b 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -2432,25 +2432,28 @@ struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr); /** - * ieee80211_find_sta_by_hw - find a station on hardware + * ieee80211_find_sta_by_ifaddr - find a station on hardware * * @hw: pointer as obtained from ieee80211_alloc_hw() - * @addr: station's address + * @addr: remote station's address + * @localaddr: local address (vif->sdata->vif.addr). Use NULL for 'any'. * * This function must be called under RCU lock and the * resulting pointer is only valid under RCU lock as well. * - * NOTE: This function should not be used! When mac80211 is converted - * internally to properly keep track of stations on multiple - * virtual interfaces, it will not always know which station to - * return here since a single address might be used by multiple - * logical stations (e.g. consider a station connecting to another - * BSSID on the same AP hardware without disconnecting first). + * NOTE: You may pass NULL for localaddr, but then you will just get + * the first STA that matches the remote address 'addr'. + * We can have multiple STA associated with multiple + * logical stations (e.g. consider a station connecting to another + * BSSID on the same AP hardware without disconnecting first). + * In this case, the result of this method with localaddr NULL + * is not reliable. * - * DO NOT USE THIS FUNCTION. + * DO NOT USE THIS FUNCTION with localaddr NULL if at all possible. */ -struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, - const u8 *addr); +struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr); /** * ieee80211_sta_block_awake - block station from waking up diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 44e10a9de0a..ca2cba9cea8 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -838,13 +838,20 @@ void ieee80211_sta_expire(struct ieee80211_sub_if_data *sdata, mutex_unlock(&local->sta_mtx); } -struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, - const u8 *addr) +struct ieee80211_sta *ieee80211_find_sta_by_ifaddr(struct ieee80211_hw *hw, + const u8 *addr, + const u8 *localaddr) { struct sta_info *sta, *nxt; - /* Just return a random station ... first in list ... */ + /* + * Just return a random station if localaddr is NULL + * ... first in list. + */ for_each_sta_info(hw_to_local(hw), addr, sta, nxt) { + if (localaddr && + compare_ether_addr(sta->sdata->vif.addr, localaddr) != 0) + continue; if (!sta->uploaded) return NULL; return &sta->sta; @@ -852,7 +859,7 @@ struct ieee80211_sta *ieee80211_find_sta_by_hw(struct ieee80211_hw *hw, return NULL; } -EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_hw); +EXPORT_SYMBOL_GPL(ieee80211_find_sta_by_ifaddr); struct ieee80211_sta *ieee80211_find_sta(struct ieee80211_vif *vif, const u8 *addr) -- cgit v1.2.3 From 554891e63a29af35cc6bb403ef34e319518114d0 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 24 Sep 2010 12:38:25 +0200 Subject: mac80211: move packet flags into packet commit 8c0c709eea5cbab97fb464cd68b06f24acc58ee1 Author: Johannes Berg Date: Wed Nov 25 17:46:15 2009 +0100 mac80211: move cmntr flag out of rx flags moved the CMNTR flag into the skb RX flags for some aggregation cleanups, but this was wrong since the optimisation this flag tried to make requires that it is kept across the processing of multiple interfaces -- which isn't true for flags in the skb. The patch not only broke the optimisation, it also introduced a bug: under some (common!) circumstances the flag will be set on an already freed skb! However, investigating this in more detail, I found that most of the flags that we set should be per packet, _except_ for this one, due to a-MPDU processing. Additionally, the flags used for processing (currently just this one) need to be reset before processing a new packet. Since we haven't actually seen bugs reported as a result of the wrong flags handling (which is not too surprising -- the only real bug case I can come up with is an a-MSDU contained in an a-MPDU), I'll make a different fix for rc. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 +-- net/mac80211/ieee80211_i.h | 38 +++++++++++++---- net/mac80211/rx.c | 102 +++++++++++++++++++++++++-------------------- net/mac80211/wpa.c | 2 +- 4 files changed, 91 insertions(+), 57 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 73469d8b64b..fe8b9dae4de 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -581,9 +581,6 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * @RX_FLAG_HT: HT MCS was used and rate_idx is MCS index * @RX_FLAG_40MHZ: HT40 (40 MHz) was used * @RX_FLAG_SHORT_GI: Short guard interval was used - * @RX_FLAG_INTERNAL_CMTR: set internally after frame was reported - * on cooked monitor to avoid double-reporting it for multiple - * virtual interfaces */ enum mac80211_rx_flags { RX_FLAG_MMIC_ERROR = 1<<0, @@ -597,7 +594,6 @@ enum mac80211_rx_flags { RX_FLAG_HT = 1<<9, RX_FLAG_40MHZ = 1<<10, RX_FLAG_SHORT_GI = 1<<11, - RX_FLAG_INTERNAL_CMTR = 1<<12, }; /** @@ -618,6 +614,7 @@ enum mac80211_rx_flags { * @rate_idx: index of data rate into band's supported rates or MCS index if * HT rates are use (RX_FLAG_HT) * @flag: %RX_FLAG_* + * @rx_flags: internal RX flags for mac80211 */ struct ieee80211_rx_status { u64 mactime; @@ -627,6 +624,7 @@ struct ieee80211_rx_status { int antenna; int rate_idx; int flag; + unsigned int rx_flags; }; /** diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 40f74727338..945fbf29719 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -159,13 +159,37 @@ typedef unsigned __bitwise__ ieee80211_rx_result; #define RX_DROP_MONITOR ((__force ieee80211_rx_result) 2u) #define RX_QUEUED ((__force ieee80211_rx_result) 3u) -#define IEEE80211_RX_IN_SCAN BIT(0) -/* frame is destined to interface currently processed (incl. multicast frames) */ -#define IEEE80211_RX_RA_MATCH BIT(1) -#define IEEE80211_RX_AMSDU BIT(2) -#define IEEE80211_RX_FRAGMENTED BIT(3) -#define IEEE80211_MALFORMED_ACTION_FRM BIT(4) -/* only add flags here that do not change with subframes of an aMPDU */ +/** + * enum ieee80211_packet_rx_flags - packet RX flags + * @IEEE80211_RX_RA_MATCH: frame is destined to interface currently processed + * (incl. multicast frames) + * @IEEE80211_RX_IN_SCAN: received while scanning + * @IEEE80211_RX_FRAGMENTED: fragmented frame + * @IEEE80211_RX_AMSDU: a-MSDU packet + * @IEEE80211_RX_MALFORMED_ACTION_FRM: action frame is malformed + * + * These are per-frame flags that are attached to a frame in the + * @rx_flags field of &struct ieee80211_rx_status. + */ +enum ieee80211_packet_rx_flags { + IEEE80211_RX_IN_SCAN = BIT(0), + IEEE80211_RX_RA_MATCH = BIT(1), + IEEE80211_RX_FRAGMENTED = BIT(2), + IEEE80211_RX_AMSDU = BIT(3), + IEEE80211_RX_MALFORMED_ACTION_FRM = BIT(4), +}; + +/** + * enum ieee80211_rx_flags - RX data flags + * + * @IEEE80211_RX_CMNTR: received on cooked monitor already + * + * These flags are used across handling multiple interfaces + * for a single frame. + */ +enum ieee80211_rx_flags { + IEEE80211_RX_CMNTR = BIT(0), +}; struct ieee80211_rx_data { struct sk_buff *skb; diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index 8c666e9e8fb..0b0e83ebe3d 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -315,6 +315,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb, static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); int tid; /* does the frame have a qos control field? */ @@ -323,9 +324,7 @@ static void ieee80211_parse_qos(struct ieee80211_rx_data *rx) /* frame has qos control */ tid = *qc & IEEE80211_QOS_CTL_TID_MASK; if (*qc & IEEE80211_QOS_CONTROL_A_MSDU_PRESENT) - rx->flags |= IEEE80211_RX_AMSDU; - else - rx->flags &= ~IEEE80211_RX_AMSDU; + status->rx_flags |= IEEE80211_RX_AMSDU; } else { /* * IEEE 802.11-2007, 7.1.3.4.1 ("Sequence Number field"): @@ -387,9 +386,10 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_passive_scan(struct ieee80211_rx_data *rx) { struct ieee80211_local *local = rx->local; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); struct sk_buff *skb = rx->skb; - if (likely(!(rx->flags & IEEE80211_RX_IN_SCAN))) + if (likely(!(status->rx_flags & IEEE80211_RX_IN_SCAN))) return RX_CONTINUE; if (test_bit(SCAN_HW_SCANNING, &local->scanning)) @@ -783,13 +783,14 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_check(struct ieee80211_rx_data *rx) { struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); /* Drop duplicate 802.11 retransmissions (IEEE 802.11 Chap. 9.2.9) */ if (rx->sta && !is_multicast_ether_addr(hdr->addr1)) { if (unlikely(ieee80211_has_retry(hdr->frame_control) && rx->sta->last_seq_ctrl[rx->queue] == hdr->seq_ctrl)) { - if (rx->flags & IEEE80211_RX_RA_MATCH) { + if (status->rx_flags & IEEE80211_RX_RA_MATCH) { rx->local->dot11FrameDuplicateCount++; rx->sta->num_duplicates++; } @@ -822,7 +823,7 @@ ieee80211_rx_h_check(struct ieee80211_rx_data *rx) if ((!ieee80211_has_fromds(hdr->frame_control) && !ieee80211_has_tods(hdr->frame_control) && ieee80211_is_data(hdr->frame_control)) || - !(rx->flags & IEEE80211_RX_RA_MATCH)) { + !(status->rx_flags & IEEE80211_RX_RA_MATCH)) { /* Drop IBSS frames and frames for other hosts * silently. */ return RX_DROP_MONITOR; @@ -879,7 +880,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) * No point in finding a key and decrypting if the frame is neither * addressed to us nor a multicast frame. */ - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_CONTINUE; /* start without a key */ @@ -1112,7 +1113,7 @@ ieee80211_rx_h_sta_process(struct ieee80211_rx_data *rx) sta->last_rx = jiffies; } - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_CONTINUE; if (rx->sdata->vif.type == NL80211_IFTYPE_STATION) @@ -1269,6 +1270,7 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) unsigned int frag, seq; struct ieee80211_fragment_entry *entry; struct sk_buff *skb; + struct ieee80211_rx_status *status; hdr = (struct ieee80211_hdr *)rx->skb->data; fc = hdr->frame_control; @@ -1368,7 +1370,8 @@ ieee80211_rx_h_defragment(struct ieee80211_rx_data *rx) } /* Complete frame has been reassembled - process it now */ - rx->flags |= IEEE80211_RX_FRAGMENTED; + status = IEEE80211_SKB_RXCB(rx->skb); + status->rx_flags |= IEEE80211_RX_FRAGMENTED; out: if (rx->sta) @@ -1385,9 +1388,10 @@ ieee80211_rx_h_ps_poll(struct ieee80211_rx_data *rx) { struct ieee80211_sub_if_data *sdata = rx->sdata; __le16 fc = ((struct ieee80211_hdr *)rx->skb->data)->frame_control; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); if (likely(!rx->sta || !ieee80211_is_pspoll(fc) || - !(rx->flags & IEEE80211_RX_RA_MATCH))) + !(status->rx_flags & IEEE80211_RX_RA_MATCH))) return RX_CONTINUE; if ((sdata->vif.type != NL80211_IFTYPE_AP) && @@ -1548,6 +1552,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) struct sk_buff *skb, *xmit_skb; struct ethhdr *ehdr = (struct ethhdr *) rx->skb->data; struct sta_info *dsta; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); skb = rx->skb; xmit_skb = NULL; @@ -1555,7 +1560,7 @@ ieee80211_deliver_skb(struct ieee80211_rx_data *rx) if ((sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN) && !(sdata->flags & IEEE80211_SDATA_DONT_BRIDGE_PACKETS) && - (rx->flags & IEEE80211_RX_RA_MATCH) && + (status->rx_flags & IEEE80211_RX_RA_MATCH) && (sdata->vif.type != NL80211_IFTYPE_AP_VLAN || !sdata->u.vlan.sta)) { if (is_multicast_ether_addr(ehdr->h_dest)) { /* @@ -1632,6 +1637,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data; __le16 fc = hdr->frame_control; struct sk_buff_head frame_list; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); if (unlikely(!ieee80211_is_data(fc))) return RX_CONTINUE; @@ -1639,7 +1645,7 @@ ieee80211_rx_h_amsdu(struct ieee80211_rx_data *rx) if (unlikely(!ieee80211_is_data_present(fc))) return RX_DROP_MONITOR; - if (!(rx->flags & IEEE80211_RX_AMSDU)) + if (!(status->rx_flags & IEEE80211_RX_AMSDU)) return RX_CONTINUE; if (ieee80211_has_a4(hdr->frame_control) && @@ -1690,6 +1696,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) struct sk_buff *skb = rx->skb, *fwd_skb; struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); hdr = (struct ieee80211_hdr *) skb->data; hdrlen = ieee80211_hdrlen(hdr->frame_control); @@ -1735,7 +1742,7 @@ ieee80211_rx_h_mesh_fwding(struct ieee80211_rx_data *rx) mesh_hdr->ttl--; - if (rx->flags & IEEE80211_RX_RA_MATCH) { + if (status->rx_flags & IEEE80211_RX_RA_MATCH) { if (!mesh_hdr->ttl) IEEE80211_IFSTA_MESH_CTR_INC(&rx->sdata->u.mesh, dropped_frames_ttl); @@ -1945,6 +1952,7 @@ static ieee80211_rx_result debug_noinline ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) { struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); /* * From here on, look only at management frames. @@ -1957,7 +1965,7 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx) if (!ieee80211_is_mgmt(mgmt->frame_control)) return RX_DROP_MONITOR; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_MONITOR; if (ieee80211_drop_unencrypted_mgmt(rx)) @@ -1972,6 +1980,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) struct ieee80211_local *local = rx->local; struct ieee80211_sub_if_data *sdata = rx->sdata; struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); int len = rx->skb->len; if (!ieee80211_is_action(mgmt->frame_control)) @@ -1984,7 +1993,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) if (!rx->sta && mgmt->u.action.category != WLAN_CATEGORY_PUBLIC) return RX_DROP_UNUSABLE; - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; switch (mgmt->u.action.category) { @@ -2080,7 +2089,7 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) return RX_CONTINUE; invalid: - rx->flags |= IEEE80211_MALFORMED_ACTION_FRM; + status->rx_flags |= IEEE80211_RX_MALFORMED_ACTION_FRM; /* will return in the next handlers */ return RX_CONTINUE; @@ -2102,10 +2111,10 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx) static ieee80211_rx_result debug_noinline ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) { - struct ieee80211_rx_status *status; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); /* skip known-bad action frames and return them in the next handler */ - if (rx->flags & IEEE80211_MALFORMED_ACTION_FRM) + if (status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) return RX_CONTINUE; /* @@ -2114,7 +2123,6 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx) * so userspace can register for those to know whether ones * it transmitted were processed or returned. */ - status = IEEE80211_SKB_RXCB(rx->skb); if (cfg80211_rx_mgmt(rx->sdata->dev, status->freq, rx->skb->data, rx->skb->len, @@ -2136,6 +2144,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data; struct sk_buff *nskb; struct ieee80211_sub_if_data *sdata = rx->sdata; + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(rx->skb); if (!ieee80211_is_action(mgmt->frame_control)) return RX_CONTINUE; @@ -2150,7 +2159,7 @@ ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx) * registration mechanisms, but older ones still use cooked * monitor interfaces so push all frames there. */ - if (!(rx->flags & IEEE80211_MALFORMED_ACTION_FRM) && + if (!(status->rx_flags & IEEE80211_RX_MALFORMED_ACTION_FRM) && (sdata->vif.type == NL80211_IFTYPE_AP || sdata->vif.type == NL80211_IFTYPE_AP_VLAN)) return RX_DROP_MONITOR; @@ -2284,8 +2293,13 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, struct net_device *prev_dev = NULL; struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); - if (status->flag & RX_FLAG_INTERNAL_CMTR) + /* + * If cooked monitor has been processed already, then + * don't do it again. If not, set the flag. + */ + if (rx->flags & IEEE80211_RX_CMNTR) goto out_free_skb; + rx->flags |= IEEE80211_RX_CMNTR; if (skb_headroom(skb) < sizeof(*rthdr) && pskb_expand_head(skb, sizeof(*rthdr), 0, GFP_ATOMIC)) @@ -2341,12 +2355,8 @@ static void ieee80211_rx_cooked_monitor(struct ieee80211_rx_data *rx, if (prev_dev) { skb->dev = prev_dev; netif_receive_skb(skb); - skb = NULL; - } else - goto out_free_skb; - - status->flag |= RX_FLAG_INTERNAL_CMTR; - return; + return; + } out_free_skb: dev_kfree_skb(skb); @@ -2407,6 +2417,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx, * same TID from the same station */ rx->skb = skb; + rx->flags = 0; CALL_RXH(ieee80211_rx_h_decrypt) CALL_RXH(ieee80211_rx_h_check_more_data) @@ -2477,7 +2488,12 @@ static void ieee80211_invoke_rx_handlers(struct ieee80211_rx_data *rx) void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) { struct sk_buff_head frames; - struct ieee80211_rx_data rx = { }; + struct ieee80211_rx_data rx = { + .sta = sta, + .sdata = sta->sdata, + .local = sta->local, + .queue = tid, + }; struct tid_ampdu_rx *tid_agg_rx; tid_agg_rx = rcu_dereference(sta->ampdu_mlme.tid_rx[tid]); @@ -2486,13 +2502,6 @@ void ieee80211_release_reorder_timeout(struct sta_info *sta, int tid) __skb_queue_head_init(&frames); - /* construct rx struct */ - rx.sta = sta; - rx.sdata = sta->sdata; - rx.local = sta->local; - rx.queue = tid; - rx.flags |= IEEE80211_RX_RA_MATCH; - spin_lock(&tid_agg_rx->reorder_lock); ieee80211_sta_reorder_release(&sta->local->hw, tid_agg_rx, &frames); spin_unlock(&tid_agg_rx->reorder_lock); @@ -2519,7 +2528,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { if (!(sdata->dev->flags & IFF_PROMISC)) return 0; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } break; case NL80211_IFTYPE_ADHOC: @@ -2529,15 +2538,15 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 1; } else if (!ieee80211_bssid_match(bssid, sdata->u.ibss.bssid)) { - if (!(rx->flags & IEEE80211_RX_IN_SCAN)) + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) return 0; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } else if (!multicast && compare_ether_addr(sdata->vif.addr, hdr->addr1) != 0) { if (!(sdata->dev->flags & IFF_PROMISC)) return 0; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } else if (!rx->sta) { int rate_idx; if (status->flag & RX_FLAG_HT) @@ -2555,7 +2564,7 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, if (!(sdata->dev->flags & IFF_PROMISC)) return 0; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } break; case NL80211_IFTYPE_AP_VLAN: @@ -2566,9 +2575,9 @@ static int prepare_for_handlers(struct ieee80211_rx_data *rx, return 0; } else if (!ieee80211_bssid_match(bssid, sdata->vif.addr)) { - if (!(rx->flags & IEEE80211_RX_IN_SCAN)) + if (!(status->rx_flags & IEEE80211_RX_IN_SCAN)) return 0; - rx->flags &= ~IEEE80211_RX_RA_MATCH; + status->rx_flags &= ~IEEE80211_RX_RA_MATCH; } break; case NL80211_IFTYPE_WDS: @@ -2602,14 +2611,14 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, int prepares; rx->skb = skb; - rx->flags |= IEEE80211_RX_RA_MATCH; + status->rx_flags |= IEEE80211_RX_RA_MATCH; prepares = prepare_for_handlers(rx, hdr); if (!prepares) return false; if (status->flag & RX_FLAG_MMIC_ERROR) { - if (rx->flags & IEEE80211_RX_RA_MATCH) + if (status->rx_flags & IEEE80211_RX_RA_MATCH) ieee80211_rx_michael_mic_report(hdr, rx); return false; } @@ -2638,6 +2647,7 @@ static bool ieee80211_prepare_and_rx_handle(struct ieee80211_rx_data *rx, static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, struct sk_buff *skb) { + struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb); struct ieee80211_local *local = hw_to_local(hw); struct ieee80211_sub_if_data *sdata; struct ieee80211_hdr *hdr; @@ -2657,7 +2667,7 @@ static void __ieee80211_rx_handle_packet(struct ieee80211_hw *hw, if (unlikely(test_bit(SCAN_HW_SCANNING, &local->scanning) || test_bit(SCAN_OFF_CHANNEL, &local->scanning))) - rx.flags |= IEEE80211_RX_IN_SCAN; + status->rx_flags |= IEEE80211_RX_IN_SCAN; if (ieee80211_is_mgmt(fc)) err = skb_linearize(skb); @@ -2808,6 +2818,8 @@ void ieee80211_rx(struct ieee80211_hw *hw, struct sk_buff *skb) } } + status->rx_flags = 0; + /* * key references and virtual interfaces are protected using RCU * and this requires that we are in a read-side RCU section during diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 43882b36da5..bee230d8fd1 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -117,7 +117,7 @@ ieee80211_rx_h_michael_mic_verify(struct ieee80211_rx_data *rx) key = &rx->key->conf.key[key_offset]; michael_mic(key, hdr, data, data_len, mic); if (memcmp(mic, data + data_len, MICHAEL_MIC_LEN) != 0 || wpa_test) { - if (!(rx->flags & IEEE80211_RX_RA_MATCH)) + if (!(status->rx_flags & IEEE80211_RX_RA_MATCH)) return RX_DROP_UNUSABLE; mac80211_ev_michael_mic_failure(rx->sdata, rx->key->conf.keyidx, -- cgit v1.2.3 From 8d98efa84b790bdd62248eb0dfff17e9baf5c844 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Sun, 26 Sep 2010 19:07:59 +0000 Subject: Phonet: Implement Pipe Controller to support Nokia Slim Modems Phonet stack assumes the presence of Pipe Controller, either in Modem or on Application Processing Engine user-space for the Pipe data. Nokia Slim Modems like WG2.5 used in ST-Ericsson U8500 platform do not implement Pipe controller in them. This patch adds Pipe Controller implemenation to Phonet stack to support Pipe data over Phonet stack for Nokia Slim Modems. Signed-off-by: Kumar Sanghvi Acked-by: Linus Walleij Signed-off-by: David S. Miller --- include/linux/phonet.h | 5 + include/net/phonet/pep.h | 21 +++ net/phonet/Kconfig | 11 ++ net/phonet/pep.c | 448 ++++++++++++++++++++++++++++++++++++++++++++++- 4 files changed, 479 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index 85e14a83283..96f5625d62f 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,6 +36,11 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 +#define PNPIPE_CREATE 3 +#define PNPIPE_ENABLE 4 +#define PNPIPE_DISABLE 5 +#define PNPIPE_DESTROY 6 +#define PNPIPE_INQ 7 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index 37f23dc05de..def6cfa3f45 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -45,6 +45,10 @@ struct pep_sock { u8 tx_fc; /* TX flow control */ u8 init_enable; /* auto-enable at creation */ u8 aligned; +#ifdef CONFIG_PHONET_PIPECTRLR + u16 remote_pep; + u8 pipe_state; +#endif }; static inline struct pep_sock *pep_sk(struct sock *sk) @@ -165,4 +169,21 @@ enum { PEP_IND_READY, }; +#ifdef CONFIG_PHONET_PIPECTRLR +#define PNS_PEP_CONNECT_UTID 0x02 +#define PNS_PIPE_CREATED_IND_UTID 0x04 +#define PNS_PIPE_ENABLE_UTID 0x0A +#define PNS_PIPE_ENABLED_IND_UTID 0x0C +#define PNS_PIPE_DISABLE_UTID 0x0F +#define PNS_PIPE_DISABLED_IND_UTID 0x11 +#define PNS_PEP_DISCONNECT_UTID 0x06 + +/* Used for tracking state of a pipe */ +enum { + PIPE_IDLE, + PIPE_DISABLED, + PIPE_ENABLED, +}; +#endif /* CONFIG_PHONET_PIPECTRLR */ + #endif diff --git a/net/phonet/Kconfig b/net/phonet/Kconfig index 6ec7d55b176..901956ada9c 100644 --- a/net/phonet/Kconfig +++ b/net/phonet/Kconfig @@ -14,3 +14,14 @@ config PHONET To compile this driver as a module, choose M here: the module will be called phonet. If unsure, say N. + +config PHONET_PIPECTRLR + bool "Phonet Pipe Controller" + depends on PHONET + default N + help + The Pipe Controller implementation in Phonet stack to support Pipe + data with Nokia Slim modems like WG2.5 used on ST-Ericsson U8500 + platform. + + If unsure, say N. diff --git a/net/phonet/pep.c b/net/phonet/pep.c index d0e7eb24c8b..7bf23cf36b0 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -88,6 +88,15 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; +#ifdef CONFIG_PHONET_PIPECTRLR + const struct phonethdr *hdr = pn_hdr(oskb); + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = hdr->pn_sdev, + .spn_obj = hdr->pn_sobj, + }; +#endif skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); if (!skb) @@ -105,10 +114,271 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, ph->pipe_handle = oph->pipe_handle; ph->error_code = code; +#ifdef CONFIG_PHONET_PIPECTRLR + return pn_skb_send(sk, skb, &spn); +#else return pn_skb_send(sk, skb, &pipe_srv); +#endif } #define PAD 0x00 + +#ifdef CONFIG_PHONET_PIPECTRLR +static u8 pipe_negotiate_fc(u8 *host_fc, u8 *remote_fc, int len) +{ + int i, j; + u8 base_fc, final_fc; + + for (i = 0; i < len; i++) { + base_fc = host_fc[i]; + for (j = 0; j < len; j++) { + if (remote_fc[j] == base_fc) { + final_fc = base_fc; + goto done; + } + } + } + return -EINVAL; + +done: + return final_fc; + +} + +static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb, + u8 *pref_rx_fc, u8 *req_tx_fc) +{ + struct pnpipehdr *hdr; + u8 n_sb; + + if (!pskb_may_pull(skb, sizeof(*hdr) + 4)) + return -EINVAL; + + hdr = pnp_hdr(skb); + n_sb = hdr->data[4]; + + __skb_pull(skb, sizeof(*hdr) + 4); + while (n_sb > 0) { + u8 type, buf[3], len = sizeof(buf); + u8 *data = pep_get_sb(skb, &type, &len, buf); + + if (data == NULL) + return -EINVAL; + + switch (type) { + case PN_PIPE_SB_REQUIRED_FC_TX: + if (len < 3 || (data[2] | data[3] | data[4]) > 3) + break; + req_tx_fc[0] = data[2]; + req_tx_fc[1] = data[3]; + req_tx_fc[2] = data[4]; + break; + + case PN_PIPE_SB_PREFERRED_FC_RX: + if (len < 3 || (data[2] | data[3] | data[4]) > 3) + break; + pref_rx_fc[0] = data[2]; + pref_rx_fc[1] = data[3]; + pref_rx_fc[2] = data[4]; + break; + + } + n_sb--; + } + return 0; +} + +static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid, + u8 msg_id, u8 p_handle, gfp_t priority) +{ + int len; + struct pnpipehdr *ph; + struct sk_buff *skb; + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = pn_dev(dobj), + .spn_obj = pn_obj(dobj), + }; + + static const u8 data[4] = { + PAD, PAD, PAD, PAD, + }; + + switch (msg_id) { + case PNS_PEP_CONNECT_REQ: + len = sizeof(data); + break; + + case PNS_PEP_DISCONNECT_REQ: + case PNS_PEP_ENABLE_REQ: + case PNS_PEP_DISABLE_REQ: + len = 0; + break; + + default: + return -EINVAL; + } + + skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + if (len) { + __skb_put(skb, len); + skb_copy_to_linear_data(skb, data, len); + } + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = utid; + ph->message_id = msg_id; + ph->pipe_handle = p_handle; + ph->error_code = PN_PIPE_NO_ERROR; + + return pn_skb_send(sk, skb, &spn); +} + +static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj, + u8 utid, u8 p_handle, u8 msg_id, u8 tx_fc, u8 rx_fc) +{ + int err_code; + struct pnpipehdr *ph; + struct sk_buff *skb; + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = pn_dev(dobj), + .spn_obj = pn_obj(dobj), + }; + + static u8 data[4] = { + 0x03, 0x04, + }; + data[2] = tx_fc; + data[3] = rx_fc; + + /* + * actually, below is number of sub-blocks and not error code. + * Pipe_created_ind message format does not have any + * error code field. However, the Phonet stack will always send + * an error code as part of pnpipehdr. So, use that err_code to + * specify the number of sub-blocks. + */ + err_code = 0x01; + + skb = alloc_skb(MAX_PNPIPE_HEADER + sizeof(data), GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_put(skb, sizeof(data)); + skb_copy_to_linear_data(skb, data, sizeof(data)); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = utid; + ph->message_id = msg_id; + ph->pipe_handle = p_handle; + ph->error_code = err_code; + + return pn_skb_send(sk, skb, &spn); +} + +static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid, + u8 p_handle, u8 msg_id) +{ + int err_code; + struct pnpipehdr *ph; + struct sk_buff *skb; + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = pn_dev(dobj), + .spn_obj = pn_obj(dobj), + }; + + /* + * actually, below is a filler. + * Pipe_enabled/disabled_ind message format does not have any + * error code field. However, the Phonet stack will always send + * an error code as part of pnpipehdr. So, use that err_code to + * specify the filler value. + */ + err_code = 0x0; + + skb = alloc_skb(MAX_PNPIPE_HEADER, GFP_ATOMIC); + if (!skb) + return -ENOMEM; + skb_set_owner_w(skb, sk); + + skb_reserve(skb, MAX_PNPIPE_HEADER); + __skb_push(skb, sizeof(*ph)); + skb_reset_transport_header(skb); + ph = pnp_hdr(skb); + ph->utid = utid; + ph->message_id = msg_id; + ph->pipe_handle = p_handle; + ph->error_code = err_code; + + return pn_skb_send(sk, skb, &spn); +} + +static int pipe_handler_enable_pipe(struct sock *sk, int cmd) +{ + int ret; + struct pep_sock *pn = pep_sk(sk); + + switch (cmd) { + case PNPIPE_ENABLE: + ret = pipe_handler_send_req(sk, pn->pn_sk.sobject, + PNS_PIPE_ENABLE_UTID, PNS_PEP_ENABLE_REQ, + pn->pipe_handle, GFP_ATOMIC); + break; + + case PNPIPE_DISABLE: + ret = pipe_handler_send_req(sk, pn->pn_sk.sobject, + PNS_PIPE_DISABLE_UTID, PNS_PEP_DISABLE_REQ, + pn->pipe_handle, GFP_ATOMIC); + break; + + default: + ret = -EINVAL; + } + + return ret; +} + +static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd) +{ + int ret; + struct pep_sock *pn = pep_sk(sk); + + switch (cmd) { + case PNPIPE_CREATE: + ret = pipe_handler_send_req(sk, pn->pn_sk.sobject, + PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, + pipe_handle, GFP_ATOMIC); + break; + + case PNPIPE_DESTROY: + ret = pipe_handler_send_req(sk, pn->remote_pep, + PNS_PEP_DISCONNECT_UTID, + PNS_PEP_DISCONNECT_REQ, + pn->pipe_handle, GFP_ATOMIC); + break; + + default: + ret = -EINVAL; + } + + return ret; +} +#endif + static int pep_accept_conn(struct sock *sk, struct sk_buff *skb) { static const u8 data[20] = { @@ -173,6 +443,14 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; +#ifdef CONFIG_PHONET_PIPECTRLR + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = pn_dev(pn->remote_pep), + .spn_obj = pn_obj(pn->remote_pep), + }; +#endif skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); if (!skb) @@ -192,7 +470,11 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) ph->data[3] = PAD; ph->data[4] = status; +#ifdef CONFIG_PHONET_PIPECTRLR + return pn_skb_send(sk, skb, &spn); +#else return pn_skb_send(sk, skb, &pipe_srv); +#endif } /* Send our RX flow control information to the sender. @@ -308,6 +590,12 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) struct pnpipehdr *hdr = pnp_hdr(skb); struct sk_buff_head *queue; int err = 0; +#ifdef CONFIG_PHONET_PIPECTRLR + struct phonethdr *ph = pn_hdr(skb); + static u8 host_pref_rx_fc[3], host_req_tx_fc[3]; + u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; + u8 negotiated_rx_fc, negotiated_tx_fc; +#endif BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); @@ -316,6 +604,40 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); break; +#ifdef CONFIG_PHONET_PIPECTRLR + case PNS_PEP_CONNECT_RESP: + if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && + (ph->pn_sobj == pn_obj(pn->remote_pep))) { + pipe_get_flow_info(sk, skb, remote_pref_rx_fc, + remote_req_tx_fc); + + negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc, + host_pref_rx_fc, + sizeof(host_pref_rx_fc)); + negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc, + remote_pref_rx_fc, + sizeof(host_pref_rx_fc)); + + pn->pipe_state = PIPE_DISABLED; + pipe_handler_send_created_ind(sk, pn->remote_pep, + PNS_PIPE_CREATED_IND_UTID, + pn->pipe_handle, PNS_PIPE_CREATED_IND, + negotiated_tx_fc, negotiated_rx_fc); + pipe_handler_send_created_ind(sk, pn->pn_sk.sobject, + PNS_PIPE_CREATED_IND_UTID, + pn->pipe_handle, PNS_PIPE_CREATED_IND, + negotiated_tx_fc, negotiated_rx_fc); + } else { + pipe_handler_send_req(sk, pn->remote_pep, + PNS_PEP_CONNECT_UTID, + PNS_PEP_CONNECT_REQ, pn->pipe_handle, + GFP_ATOMIC); + pipe_get_flow_info(sk, skb, host_pref_rx_fc, + host_req_tx_fc); + } + break; +#endif + case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); sk->sk_state = TCP_CLOSE_WAIT; @@ -323,11 +645,41 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_state_change(sk); break; +#ifdef CONFIG_PHONET_PIPECTRLR + case PNS_PEP_DISCONNECT_RESP: + pn->pipe_state = PIPE_IDLE; + pipe_handler_send_req(sk, pn->pn_sk.sobject, + PNS_PEP_DISCONNECT_UTID, + PNS_PEP_DISCONNECT_REQ, pn->pipe_handle, + GFP_KERNEL); + break; +#endif + case PNS_PEP_ENABLE_REQ: /* Wait for PNS_PIPE_(ENABLED|REDIRECTED)_IND */ pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; +#ifdef CONFIG_PHONET_PIPECTRLR + case PNS_PEP_ENABLE_RESP: + if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && + (ph->pn_sobj == pn_obj(pn->remote_pep))) { + pn->pipe_state = PIPE_ENABLED; + pipe_handler_send_ind(sk, pn->remote_pep, + PNS_PIPE_ENABLED_IND_UTID, + pn->pipe_handle, PNS_PIPE_ENABLED_IND); + pipe_handler_send_ind(sk, pn->pn_sk.sobject, + PNS_PIPE_ENABLED_IND_UTID, + pn->pipe_handle, PNS_PIPE_ENABLED_IND); + } else + pipe_handler_send_req(sk, pn->remote_pep, + PNS_PIPE_ENABLE_UTID, + PNS_PEP_ENABLE_REQ, pn->pipe_handle, + GFP_KERNEL); + + break; +#endif + case PNS_PEP_RESET_REQ: switch (hdr->state_after_reset) { case PN_PIPE_DISABLE: @@ -346,6 +698,27 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; +#ifdef CONFIG_PHONET_PIPECTRLR + case PNS_PEP_DISABLE_RESP: + if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && + (ph->pn_sobj == pn_obj(pn->remote_pep))) { + pn->pipe_state = PIPE_DISABLED; + pipe_handler_send_ind(sk, pn->remote_pep, + PNS_PIPE_DISABLED_IND_UTID, + pn->pipe_handle, + PNS_PIPE_DISABLED_IND); + pipe_handler_send_ind(sk, pn->pn_sk.sobject, + PNS_PIPE_DISABLED_IND_UTID, + pn->pipe_handle, + PNS_PIPE_DISABLED_IND); + } else + pipe_handler_send_req(sk, pn->remote_pep, + PNS_PIPE_DISABLE_UTID, + PNS_PEP_DISABLE_REQ, pn->pipe_handle, + GFP_KERNEL); + break; +#endif + case PNS_PEP_CTRL_REQ: if (skb_queue_len(&pn->ctrlreq_queue) >= PNPIPE_CTRLREQ_MAX) { atomic_inc(&sk->sk_drops); @@ -519,6 +892,9 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; newpn->aligned = aligned; +#ifdef CONFIG_PHONET_PIPECTRLR + newpn->remote_pep = pn->remote_pep; +#endif BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); skb_queue_head(&newsk->sk_receive_queue, skb); @@ -781,6 +1157,10 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, { struct pep_sock *pn = pep_sk(sk); int val = 0, err = 0; +#ifdef CONFIG_PHONET_PIPECTRLR + int remote_pep; + int pipe_handle; +#endif if (level != SOL_PNPIPE) return -ENOPROTOOPT; @@ -791,6 +1171,48 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { +#ifdef CONFIG_PHONET_PIPECTRLR + case PNPIPE_CREATE: + if (val) { + if (pn->pipe_state > PIPE_IDLE) { + err = -EFAULT; + break; + } + remote_pep = val & 0xFFFF; + pipe_handle = (val >> 16) & 0xFF; + pn->remote_pep = remote_pep; + err = pipe_handler_create_pipe(sk, pipe_handle, + PNPIPE_CREATE); + break; + } + + case PNPIPE_ENABLE: + if (pn->pipe_state != PIPE_DISABLED) { + err = -EFAULT; + break; + } + err = pipe_handler_enable_pipe(sk, PNPIPE_ENABLE); + break; + + case PNPIPE_DISABLE: + if (pn->pipe_state != PIPE_ENABLED) { + err = -EFAULT; + break; + } + + err = pipe_handler_enable_pipe(sk, PNPIPE_DISABLE); + break; + + case PNPIPE_DESTROY: + if (pn->pipe_state < PIPE_DISABLED) { + err = -EFAULT; + break; + } + + err = pipe_handler_create_pipe(sk, 0x0, PNPIPE_DESTROY); + break; +#endif + case PNPIPE_ENCAP: if (val && val != PNPIPE_ENCAP_IP) { err = -EINVAL; @@ -840,6 +1262,13 @@ static int pep_getsockopt(struct sock *sk, int level, int optname, case PNPIPE_ENCAP: val = pn->ifindex ? PNPIPE_ENCAP_IP : PNPIPE_ENCAP_NONE; break; + +#ifdef CONFIG_PHONET_PIPECTRLR + case PNPIPE_INQ: + val = pn->pipe_state; + break; +#endif + case PNPIPE_IFINDEX: val = pn->ifindex; break; @@ -859,7 +1288,14 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) { struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; - int err; +#ifdef CONFIG_PHONET_PIPECTRLR + struct sockaddr_pn spn = { + .spn_family = AF_PHONET, + .spn_resource = 0xD9, + .spn_dev = pn_dev(pn->remote_pep), + .spn_obj = pn_obj(pn->remote_pep), + }; +#endif if (pn_flow_safe(pn->tx_fc) && !atomic_add_unless(&pn->tx_credits, -1, 0)) { @@ -877,11 +1313,11 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) } else ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; - - err = pn_skb_send(sk, skb, &pipe_srv); - if (err && pn_flow_safe(pn->tx_fc)) - atomic_inc(&pn->tx_credits); - return err; +#ifdef CONFIG_PHONET_PIPECTRLR + return pn_skb_send(sk, skb, &spn); +#else + return pn_skb_send(sk, skb, &pipe_srv); +#endif } static int pep_sendmsg(struct kiocb *iocb, struct sock *sk, -- cgit v1.2.3 From 290b895e0ba4552dfcfc4bd35759c192345b934a Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 27 Sep 2010 00:33:35 +0000 Subject: tunnels: prepare percpu accounting Tunnels are going to use percpu for their accounting. They are going to use a new tstats field in net_device. skb_tunnel_rx() is changed to be a wrapper around __skb_tunnel_rx() IPTUNNEL_XMIT() is changed to be a wrapper around __IPTUNNEL_XMIT() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + include/net/dst.h | 24 +++++++++++++++++++----- include/net/ipip.h | 12 +++++++----- 3 files changed, 27 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 01bd4c82d98..83de0eb7a07 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1053,6 +1053,7 @@ struct net_device { union { void *ml_priv; struct pcpu_lstats __percpu *lstats; /* loopback stats */ + struct pcpu_tstats __percpu *tstats; /* tunnel stats */ }; /* GARP */ struct garp_port *garp_port; diff --git a/include/net/dst.h b/include/net/dst.h index 02386505033..aa53fbc34b2 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -227,6 +227,23 @@ static inline void skb_dst_force(struct sk_buff *skb) } +/** + * __skb_tunnel_rx - prepare skb for rx reinsert + * @skb: buffer + * @dev: tunnel device + * + * After decapsulation, packet is going to re-enter (netif_rx()) our stack, + * so make some cleanups. (no accounting done) + */ +static inline void __skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) +{ + skb->dev = dev; + skb->rxhash = 0; + skb_set_queue_mapping(skb, 0); + skb_dst_drop(skb); + nf_reset(skb); +} + /** * skb_tunnel_rx - prepare skb for rx reinsert * @skb: buffer @@ -234,17 +251,14 @@ static inline void skb_dst_force(struct sk_buff *skb) * * After decapsulation, packet is going to re-enter (netif_rx()) our stack, * so make some cleanups, and perform accounting. + * Note: this accounting is not SMP safe. */ static inline void skb_tunnel_rx(struct sk_buff *skb, struct net_device *dev) { - skb->dev = dev; /* TODO : stats should be SMP safe */ dev->stats.rx_packets++; dev->stats.rx_bytes += skb->len; - skb->rxhash = 0; - skb_set_queue_mapping(skb, 0); - skb_dst_drop(skb); - nf_reset(skb); + __skb_tunnel_rx(skb, dev); } /* Children define the path of the packet through the diff --git a/include/net/ipip.h b/include/net/ipip.h index 65caea8b414..58abbf966b0 100644 --- a/include/net/ipip.h +++ b/include/net/ipip.h @@ -45,7 +45,7 @@ struct ip_tunnel_prl_entry { struct rcu_head rcu_head; }; -#define IPTUNNEL_XMIT() do { \ +#define __IPTUNNEL_XMIT(stats1, stats2) do { \ int err; \ int pkt_len = skb->len - skb_transport_offset(skb); \ \ @@ -54,12 +54,14 @@ struct ip_tunnel_prl_entry { \ err = ip_local_out(skb); \ if (likely(net_xmit_eval(err) == 0)) { \ - txq->tx_bytes += pkt_len; \ - txq->tx_packets++; \ + (stats1)->tx_bytes += pkt_len; \ + (stats1)->tx_packets++; \ } else { \ - stats->tx_errors++; \ - stats->tx_aborted_errors++; \ + (stats2)->tx_errors++; \ + (stats2)->tx_aborted_errors++; \ } \ } while (0) +#define IPTUNNEL_XMIT() __IPTUNNEL_XMIT(txq, stats) + #endif -- cgit v1.2.3 From bc01befdcf3e40979eb518085a075cbf0aacede0 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Sep 2010 21:06:34 +0200 Subject: netfilter: ctnetlink: add support for user-space expectation helpers This patch adds the basic infrastructure to support user-space expectation helpers via ctnetlink and the netfilter queuing infrastructure NFQUEUE. Basically, this patch: * adds NF_CT_EXPECT_USERSPACE flag to identify user-space created expectations. I have also added a sanity check in __nf_ct_expect_check() to avoid that kernel-space helpers may create an expectation if the master conntrack has no helper assigned. * adds some branches to check if the master conntrack helper exists, otherwise we skip the code that refers to kernel-space helper such as the local expectation list and the expectation policy. * allows to set the timeout for user-space expectations with no helper assigned. * a list of expectations created from user-space that depends on ctnetlink (if this module is removed, they are deleted). * includes USERSPACE in the /proc output for expectations that have been created by a user-space helper. This patch also modifies ctnetlink to skip including the helper name in the Netlink messages if no kernel-space helper is set (since no user-space expectation has not kernel-space kernel assigned). You can access an example user-space FTP conntrack helper at: http://people.netfilter.org/pablo/userspace-conntrack-helpers/nf-ftp-helper-userspace-POC.tar.bz Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 1 + net/netfilter/nf_conntrack_expect.c | 62 ++++++++++++++++++++------- net/netfilter/nf_conntrack_netlink.c | 46 +++++++++++++------- 4 files changed, 79 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index fdc50cae861..23a1a08578a 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -103,6 +103,7 @@ enum ip_conntrack_expect_events { /* expectation flags */ #define NF_CT_EXPECT_PERMANENT 0x1 #define NF_CT_EXPECT_INACTIVE 0x2 +#define NF_CT_EXPECT_USERSPACE 0x4 #ifdef __KERNEL__ struct ip_conntrack_stat { diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 96bb42af5fa..416b8384448 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -85,6 +85,7 @@ nf_ct_find_expectation(struct net *net, u16 zone, void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); +void nf_ct_remove_userspace_expectations(void); /* Allocate space for an expectation: this is mandatory before calling nf_ct_expect_related. You will have to call put afterwards. */ diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index acb29ccaa41..b30a1f2aac0 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -38,20 +38,23 @@ static int nf_ct_expect_hash_rnd_initted __read_mostly; static struct kmem_cache *nf_ct_expect_cachep __read_mostly; +static HLIST_HEAD(nf_ct_userspace_expect_list); + /* nf_conntrack_expect helper functions */ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); - NF_CT_ASSERT(master_help); NF_CT_ASSERT(!timer_pending(&exp->timeout)); hlist_del_rcu(&exp->hnode); net->ct.expect_count--; hlist_del(&exp->lnode); - master_help->expecting[exp->class]--; + if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) + master_help->expecting[exp->class]--; + nf_ct_expect_put(exp); NF_CT_STAT_INC(net, expect_delete); @@ -320,16 +323,21 @@ static void nf_ct_expect_insert(struct nf_conntrack_expect *exp) atomic_inc(&exp->use); - hlist_add_head(&exp->lnode, &master_help->expectations); - master_help->expecting[exp->class]++; + if (master_help) { + hlist_add_head(&exp->lnode, &master_help->expectations); + master_help->expecting[exp->class]++; + } else if (exp->flags & NF_CT_EXPECT_USERSPACE) + hlist_add_head(&exp->lnode, &nf_ct_userspace_expect_list); hlist_add_head_rcu(&exp->hnode, &net->ct.expect_hash[h]); net->ct.expect_count++; setup_timer(&exp->timeout, nf_ct_expectation_timed_out, (unsigned long)exp); - p = &master_help->helper->expect_policy[exp->class]; - exp->timeout.expires = jiffies + p->timeout * HZ; + if (master_help) { + p = &master_help->helper->expect_policy[exp->class]; + exp->timeout.expires = jiffies + p->timeout * HZ; + } add_timer(&exp->timeout); atomic_inc(&exp->use); @@ -380,7 +388,9 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) unsigned int h; int ret = 1; - if (!master_help->helper) { + /* Don't allow expectations created from kernel-space with no helper */ + if (!(expect->flags & NF_CT_EXPECT_USERSPACE) && + (!master_help || (master_help && !master_help->helper))) { ret = -ESHUTDOWN; goto out; } @@ -398,13 +408,16 @@ static inline int __nf_ct_expect_check(struct nf_conntrack_expect *expect) } } /* Will be over limit? */ - p = &master_help->helper->expect_policy[expect->class]; - if (p->max_expected && - master_help->expecting[expect->class] >= p->max_expected) { - evict_oldest_expect(master, expect); - if (master_help->expecting[expect->class] >= p->max_expected) { - ret = -EMFILE; - goto out; + if (master_help) { + p = &master_help->helper->expect_policy[expect->class]; + if (p->max_expected && + master_help->expecting[expect->class] >= p->max_expected) { + evict_oldest_expect(master, expect); + if (master_help->expecting[expect->class] + >= p->max_expected) { + ret = -EMFILE; + goto out; + } } } @@ -439,6 +452,21 @@ out: } EXPORT_SYMBOL_GPL(nf_ct_expect_related_report); +void nf_ct_remove_userspace_expectations(void) +{ + struct nf_conntrack_expect *exp; + struct hlist_node *n, *next; + + hlist_for_each_entry_safe(exp, n, next, + &nf_ct_userspace_expect_list, lnode) { + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect(exp); + nf_ct_expect_put(exp); + } + } +} +EXPORT_SYMBOL_GPL(nf_ct_remove_userspace_expectations); + #ifdef CONFIG_PROC_FS struct ct_expect_iter_state { struct seq_net_private p; @@ -529,8 +557,12 @@ static int exp_seq_show(struct seq_file *s, void *v) seq_printf(s, "PERMANENT"); delim = ","; } - if (expect->flags & NF_CT_EXPECT_INACTIVE) + if (expect->flags & NF_CT_EXPECT_INACTIVE) { seq_printf(s, "%sINACTIVE", delim); + delim = ","; + } + if (expect->flags & NF_CT_EXPECT_USERSPACE) + seq_printf(s, "%sUSERSPACE", delim); helper = rcu_dereference(nfct_help(expect->master)->helper); if (helper) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index 0804e0ef650..b4077be5f66 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1560,8 +1560,8 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, const struct nf_conntrack_expect *exp) { struct nf_conn *master = exp->master; - struct nf_conntrack_helper *helper; long timeout = (exp->timeout.expires - jiffies) / HZ; + struct nf_conn_help *help; if (timeout < 0) timeout = 0; @@ -1578,9 +1578,14 @@ ctnetlink_exp_dump_expect(struct sk_buff *skb, NLA_PUT_BE32(skb, CTA_EXPECT_TIMEOUT, htonl(timeout)); NLA_PUT_BE32(skb, CTA_EXPECT_ID, htonl((unsigned long)exp)); NLA_PUT_BE32(skb, CTA_EXPECT_FLAGS, htonl(exp->flags)); - helper = rcu_dereference(nfct_help(master)->helper); - if (helper) - NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); + help = nfct_help(master); + if (help) { + struct nf_conntrack_helper *helper; + + helper = rcu_dereference(help->helper); + if (helper) + NLA_PUT_STRING(skb, CTA_EXPECT_HELP_NAME, helper->name); + } return 0; @@ -1921,24 +1926,32 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (!h) return -ENOENT; ct = nf_ct_tuplehash_to_ctrack(h); - help = nfct_help(ct); - - if (!help || !help->helper) { - /* such conntrack hasn't got any helper, abort */ - err = -EOPNOTSUPP; - goto out; - } - exp = nf_ct_expect_alloc(ct); if (!exp) { err = -ENOMEM; goto out; } + help = nfct_help(ct); + if (!help) { + if (!cda[CTA_EXPECT_TIMEOUT]) { + err = -EINVAL; + goto out; + } + exp->timeout.expires = + jiffies + ntohl(nla_get_be32(cda[CTA_EXPECT_TIMEOUT])) * HZ; - if (cda[CTA_EXPECT_FLAGS]) - exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); - else - exp->flags = 0; + exp->flags = NF_CT_EXPECT_USERSPACE; + if (cda[CTA_EXPECT_FLAGS]) { + exp->flags |= + ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); + } + } else { + if (cda[CTA_EXPECT_FLAGS]) { + exp->flags = ntohl(nla_get_be32(cda[CTA_EXPECT_FLAGS])); + exp->flags &= ~NF_CT_EXPECT_USERSPACE; + } else + exp->flags = 0; + } exp->class = 0; exp->expectfn = NULL; @@ -2109,6 +2122,7 @@ static void __exit ctnetlink_exit(void) { pr_info("ctnetlink: unregistering from nfnetlink.\n"); + nf_ct_remove_userspace_expectations(); #ifdef CONFIG_NF_CONNTRACK_EVENTS nf_ct_expect_unregister_notifier(&ctnl_notifier_exp); nf_conntrack_unregister_notifier(&ctnl_notifier); -- cgit v1.2.3 From 4465b469008bc03b98a1b8df4e9ae501b6c69d4b Mon Sep 17 00:00:00 2001 From: Tom Herbert Date: Sun, 23 May 2010 19:54:12 +0000 Subject: ipv4: Allow configuring subnets as local addresses This patch allows a host to be configured to respond to any address in a specified range as if it were local, without actually needing to configure the address on an interface. This is done through routing table configuration. For instance, to configure a host to respond to any address in 10.1/16 received on eth0 as a local address we can do: ip rule add from all iif eth0 lookup 200 ip route add local 10.1/16 dev lo proto kernel scope host src 127.0.0.1 table 200 This host is now reachable by any 10.1/16 address (route lookup on input for packets received on eth0 can find the route). On output, the rule will not be matched so that this host can still send packets to 10.1/16 (not sent on loopback). Presumably, external routing can be configured to make sense out of this. To make this work, we needed to modify the logic in finding the interface which is assigned a given source address for output (dev_ip_find). We perform a normal fib_lookup instead of just a lookup on the local table, and in the lookup we ignore the input interface for matching. This patch is useful to implement IP-anycast for subnets of virtual addresses. Signed-off-by: Tom Herbert Signed-off-by: David S. Miller --- include/net/flow.h | 1 + net/core/fib_rules.c | 3 ++- net/ipv4/fib_frontend.c | 7 +++---- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index bb08692a20b..0ac3fb5e097 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -49,6 +49,7 @@ struct flowi { __u8 proto; __u8 flags; #define FLOWI_FLAG_ANYSRC 0x01 +#define FLOWI_FLAG_MATCH_ANY_IIF 0x02 union { struct { __be16 sport; diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index d0787284cb0..332c2e31d04 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -182,7 +182,8 @@ static int fib_rule_match(struct fib_rule *rule, struct fib_rules_ops *ops, { int ret = 0; - if (rule->iifindex && (rule->iifindex != fl->iif)) + if (rule->iifindex && (rule->iifindex != fl->iif) && + !(fl->flags & FLOWI_FLAG_MATCH_ANY_IIF)) goto out; if (rule->oifindex && (rule->oifindex != fl->oif)) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7d02a9f999f..981f3c59b33 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -153,17 +153,16 @@ static void fib_flush(struct net *net) struct net_device * ip_dev_find(struct net *net, __be32 addr) { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; + struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } }, + .flags = FLOWI_FLAG_MATCH_ANY_IIF }; struct fib_result res; struct net_device *dev = NULL; - struct fib_table *local_table; #ifdef CONFIG_IP_MULTIPLE_TABLES res.r = NULL; #endif - local_table = fib_get_table(net, RT_TABLE_LOCAL); - if (!local_table || fib_table_lookup(local_table, &fl, &res)) + if (fib_lookup(net, &fl, &res)) return NULL; if (res.type != RTN_LOCAL) goto out; -- cgit v1.2.3 From a64de47c091e4a337fa9763315cb6f2fbf0c583b Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 17:08:02 +0000 Subject: arp: remove unnecessary export of arp_broken_ops arp_broken_ops is only used in arp.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/arp.h | 2 -- net/ipv4/arp.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 716f43c5c98..f4cf6ce6658 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -26,6 +26,4 @@ extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, const unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); -extern const struct neigh_ops arp_broken_ops; - #endif /* _ARP_H */ diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 4083c186fd3..d9031ad6782 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -161,7 +161,7 @@ static const struct neigh_ops arp_direct_ops = { .queue_xmit = dev_queue_xmit, }; -const struct neigh_ops arp_broken_ops = { +static const struct neigh_ops arp_broken_ops = { .family = AF_INET, .solicit = arp_solicit, .error_report = arp_error_report, @@ -170,7 +170,6 @@ const struct neigh_ops arp_broken_ops = { .hh_output = dev_queue_xmit, .queue_xmit = dev_queue_xmit, }; -EXPORT_SYMBOL(arp_broken_ops); struct neigh_table arp_tbl = { .family = AF_INET, -- cgit v1.2.3 From 1b9f409293529da4630bfc5d6d8e7d7451a6ccb5 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 28 Sep 2010 19:30:14 +0000 Subject: tcp: tcp_enter_quickack_mode can be static Function only used in tcp_input.c Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/tcp.h | 2 -- net/ipv4/tcp_input.c | 2 +- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/tcp.h b/include/net/tcp.h index 914a60c7ad6..4fee0424af7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -346,8 +346,6 @@ static inline void tcp_dec_quickack_mode(struct sock *sk, } } -extern void tcp_enter_quickack_mode(struct sock *sk); - #define TCP_ECN_OK 1 #define TCP_ECN_QUEUE_CWR 2 #define TCP_ECN_DEMAND_CWR 4 diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index fabc09a58d7..eaf20e7e61d 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -182,7 +182,7 @@ static void tcp_incr_quickack(struct sock *sk) icsk->icsk_ack.quick = min(quickacks, TCP_MAX_QUICKACKS); } -void tcp_enter_quickack_mode(struct sock *sk) +static void tcp_enter_quickack_mode(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); tcp_incr_quickack(sk); -- cgit v1.2.3 From 367e5e376922dcf52f92e1db436010fb828d3bfa Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 30 Sep 2010 05:36:29 +0000 Subject: neigh: reorder fields in struct neighbour On 64bit arches, there are two 32bit holes that we can remove. sizeof(struct neighbour) shrinks from 0xf8 to 0xf0 bytes Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 242879b6c4d..7d08fd1062f 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,7 +94,7 @@ struct neighbour { struct neighbour *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; + struct net_device *dev; unsigned long used; unsigned long confirmed; unsigned long updated; @@ -102,11 +102,11 @@ struct neighbour { __u8 nud_state; __u8 type; __u8 dead; + atomic_t refcnt; atomic_t probes; rwlock_t lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; - atomic_t refcnt; int (*output)(struct sk_buff *skb); struct sk_buff_head arp_queue; struct timer_list timer; @@ -163,7 +163,7 @@ struct neigh_table { atomic_t entries; rwlock_t lock; unsigned long last_rand; - struct kmem_cache *kmem_cachep; + struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; struct neighbour **hash_buckets; unsigned int hash_mask; -- cgit v1.2.3 From c7d4426a98a5f6654cd0b4b33d9dab2e77192c18 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 3 Oct 2010 22:17:54 -0700 Subject: net: introduce DST_NOCACHE flag While doing stress tests with IP route cache disabled, and multi queue devices, I noticed a very high contention on one rwlock used in neighbour code. When many cpus are trying to send frames (possibly using a high performance multiqueue device) to the same neighbour, they fight for the neigh->lock rwlock in order to call neigh_hh_init(), and fight on hh->hh_refcnt (a pair of atomic_inc/atomic_dec_and_test()) But we dont need to call neigh_hh_init() for dst that are used only once. It costs four atomic operations at least, on two contended cache lines, plus the high contention on neigh->lock rwlock. Introduce a new dst flag, DST_NOCACHE, that is set when dst was not inserted in route cache. With the stress test bench, sending 160000000 frames on one neighbour, results are : Before patch: real 2m28.406s user 0m11.781s sys 36m17.964s After patch: real 1m26.532s user 0m12.185s sys 20m3.903s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst.h | 9 +++++---- net/core/neighbour.c | 4 +++- net/ipv4/route.c | 1 + 3 files changed, 9 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index aa53fbc34b2..a217c838ec0 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -43,10 +43,11 @@ struct dst_entry { short error; short obsolete; int flags; -#define DST_HOST 1 -#define DST_NOXFRM 2 -#define DST_NOPOLICY 4 -#define DST_NOHASH 8 +#define DST_HOST 0x0001 +#define DST_NOXFRM 0x0002 +#define DST_NOPOLICY 0x0004 +#define DST_NOHASH 0x0008 +#define DST_NOCACHE 0x0010 unsigned long expires; unsigned short header_len; /* more space at head required */ diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 96b1a749abb..b142a0d7607 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1210,7 +1210,9 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; - if (dev->header_ops->cache && !dst->hh) { + if (dev->header_ops->cache && + !dst->hh && + !(dst->flags & DST_NOCACHE)) { write_lock_bh(&neigh->lock); if (!dst->hh) neigh_hh_init(neigh, dst, dst->ops->protocol); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index a61acea975f..c3cb8bd2363 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1107,6 +1107,7 @@ restart: * on the route gc list. */ + rt->dst.flags |= DST_NOCACHE; if (rt->rt_type == RTN_UNICAST || rt->fl.iif == 0) { int err = arp_bind_neighbour(&rt->dst); if (err) { -- cgit v1.2.3 From f11017ec2d1859c661f4e2b12c4a8d250e1f47cf Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:52 +0900 Subject: IPVS: Add struct ip_vs_conn_param Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 44 +++++--- net/netfilter/ipvs/ip_vs_conn.c | 171 ++++++++++++++++---------------- net/netfilter/ipvs/ip_vs_core.c | 64 ++++++------ net/netfilter/ipvs/ip_vs_ftp.c | 43 ++++---- net/netfilter/ipvs/ip_vs_nfct.c | 12 +-- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 47 ++++----- net/netfilter/ipvs/ip_vs_sync.c | 33 +++--- 7 files changed, 211 insertions(+), 203 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3915a4f4cd3..d4da774eca7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -357,6 +357,15 @@ struct ip_vs_protocol { extern struct ip_vs_protocol * ip_vs_proto_get(unsigned short proto); +struct ip_vs_conn_param { + const union nf_inet_addr *caddr; + const union nf_inet_addr *vaddr; + __be16 cport; + __be16 vport; + __u16 protocol; + u16 af; +}; + /* * IP_VS structure allocated for each dynamically scheduled connection */ @@ -626,13 +635,23 @@ enum { IP_VS_DIR_LAST, }; -extern struct ip_vs_conn *ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +static inline void ip_vs_conn_fill_param(int af, int protocol, + const union nf_inet_addr *caddr, + __be16 cport, + const union nf_inet_addr *vaddr, + __be16 vport, + struct ip_vs_conn_param *p) +{ + p->af = af; + p->protocol = protocol; + p->caddr = caddr; + p->cport = cport; + p->vaddr = vaddr; + p->vport = vport; +} -extern struct ip_vs_conn *ip_vs_ct_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); +struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -640,9 +659,7 @@ struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, unsigned int proto_off, int inverse); -extern struct ip_vs_conn *ip_vs_conn_out_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port); +struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p); struct ip_vs_conn * ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -658,11 +675,10 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) extern void ip_vs_conn_put(struct ip_vs_conn *cp); extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); -extern struct ip_vs_conn * -ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, - const union nf_inet_addr *daddr, __be16 dport, unsigned flags, - struct ip_vs_dest *dest); +struct ip_vs_conn *ip_vs_conn_new(const struct ip_vs_conn_param *p, + const union nf_inet_addr *daddr, + __be16 dport, unsigned flags, + struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); extern const char * ip_vs_state_name(__u16 proto, int state); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index a970d969149..deeb906a797 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -218,27 +218,26 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) /* * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. * Called for pkts coming from OUTside-to-INside. - * s_addr, s_port: pkt source address (foreign host) - * d_addr, d_port: pkt dest address (load balancer) + * p->caddr, p->cport: pkt source address (foreign host) + * p->vaddr, p->vport: pkt dest address (load balancer) */ -static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port) +static inline struct ip_vs_conn * +__ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == af && - ip_vs_addr_equal(af, s_addr, &cp->caddr) && - ip_vs_addr_equal(af, d_addr, &cp->vaddr) && - s_port == cp->cport && d_port == cp->vport && - ((!s_port) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && - protocol == cp->protocol) { + if (cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && + ip_vs_addr_equal(p->af, p->vaddr, &cp->vaddr) && + p->cport == cp->cport && p->vport == cp->vport && + ((!p->cport) ^ (!(cp->flags & IP_VS_CONN_F_NO_CPORT))) && + p->protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ct_read_unlock(hash); @@ -251,71 +250,82 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get return NULL; } -struct ip_vs_conn *ip_vs_conn_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port) +struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p) { struct ip_vs_conn *cp; - cp = __ip_vs_conn_in_get(af, protocol, s_addr, s_port, d_addr, d_port); - if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) - cp = __ip_vs_conn_in_get(af, protocol, s_addr, 0, d_addr, - d_port); + cp = __ip_vs_conn_in_get(p); + if (!cp && atomic_read(&ip_vs_conn_no_cport_cnt)) { + struct ip_vs_conn_param cport_zero_p = *p; + cport_zero_p.cport = 0; + cp = __ip_vs_conn_in_get(&cport_zero_p); + } IP_VS_DBG_BUF(9, "lookup/in %s %s:%d->%s:%d %s\n", - ip_vs_proto_name(protocol), - IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), - IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ip_vs_proto_name(p->protocol), + IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport), + IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport), cp ? "hit" : "not hit"); return cp; } +static int +ip_vs_conn_fill_param_proto(int af, const struct sk_buff *skb, + const struct ip_vs_iphdr *iph, + unsigned int proto_off, int inverse, + struct ip_vs_conn_param *p) +{ + __be16 _ports[2], *pptr; + + pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); + if (pptr == NULL) + return 1; + + if (likely(!inverse)) + ip_vs_conn_fill_param(af, iph->protocol, &iph->saddr, pptr[0], + &iph->daddr, pptr[1], p); + else + ip_vs_conn_fill_param(af, iph->protocol, &iph->daddr, pptr[1], + &iph->saddr, pptr[0], p); + return 0; +} + struct ip_vs_conn * ip_vs_conn_in_get_proto(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { - __be16 _ports[2], *pptr; + struct ip_vs_conn_param p; - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) + if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) return NULL; - if (likely(!inverse)) - return ip_vs_conn_in_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_in_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); + return ip_vs_conn_in_get(&p); } EXPORT_SYMBOL_GPL(ip_vs_conn_in_get_proto); /* Get reference to connection template */ -struct ip_vs_conn *ip_vs_ct_in_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port) +struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(af, protocol, s_addr, s_port); + hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == af && - ip_vs_addr_equal(af, s_addr, &cp->caddr) && + if (cp->af == p->af && + ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && /* protocol should only be IPPROTO_IP if - * d_addr is a fwmark */ - ip_vs_addr_equal(protocol == IPPROTO_IP ? AF_UNSPEC : af, - d_addr, &cp->vaddr) && - s_port == cp->cport && d_port == cp->vport && + * p->vaddr is a fwmark */ + ip_vs_addr_equal(p->protocol == IPPROTO_IP ? AF_UNSPEC : + p->af, p->vaddr, &cp->vaddr) && + p->cport == cp->cport && p->vport == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - protocol == cp->protocol) { + p->protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); goto out; @@ -327,23 +337,19 @@ struct ip_vs_conn *ip_vs_ct_in_get ct_read_unlock(hash); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", - ip_vs_proto_name(protocol), - IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), - IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ip_vs_proto_name(p->protocol), + IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport), + IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport), cp ? "hit" : "not hit"); return cp; } -/* - * Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. - * Called for pkts coming from inside-to-OUTside. - * s_addr, s_port: pkt source address (inside host) - * d_addr, d_port: pkt dest address (foreign host) - */ -struct ip_vs_conn *ip_vs_conn_out_get -(int af, int protocol, const union nf_inet_addr *s_addr, __be16 s_port, - const union nf_inet_addr *d_addr, __be16 d_port) +/* Gets ip_vs_conn associated with supplied parameters in the ip_vs_conn_tab. + * Called for pkts coming from inside-to-OUTside. + * p->caddr, p->cport: pkt source address (inside host) + * p->vaddr, p->vport: pkt dest address (foreign host) */ +struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -351,16 +357,16 @@ struct ip_vs_conn *ip_vs_conn_out_get /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(af, protocol, d_addr, d_port); + hash = ip_vs_conn_hashkey(p->af, p->protocol, p->vaddr, p->vport); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { - if (cp->af == af && - ip_vs_addr_equal(af, d_addr, &cp->caddr) && - ip_vs_addr_equal(af, s_addr, &cp->daddr) && - d_port == cp->cport && s_port == cp->dport && - protocol == cp->protocol) { + if (cp->af == p->af && + ip_vs_addr_equal(p->af, p->vaddr, &cp->caddr) && + ip_vs_addr_equal(p->af, p->caddr, &cp->daddr) && + p->vport == cp->cport && p->cport == cp->dport && + p->protocol == cp->protocol) { /* HIT */ atomic_inc(&cp->refcnt); ret = cp; @@ -371,9 +377,9 @@ struct ip_vs_conn *ip_vs_conn_out_get ct_read_unlock(hash); IP_VS_DBG_BUF(9, "lookup/out %s %s:%d->%s:%d %s\n", - ip_vs_proto_name(protocol), - IP_VS_DBG_ADDR(af, s_addr), ntohs(s_port), - IP_VS_DBG_ADDR(af, d_addr), ntohs(d_port), + ip_vs_proto_name(p->protocol), + IP_VS_DBG_ADDR(p->af, p->caddr), ntohs(p->cport), + IP_VS_DBG_ADDR(p->af, p->vaddr), ntohs(p->vport), ret ? "hit" : "not hit"); return ret; @@ -385,20 +391,12 @@ ip_vs_conn_out_get_proto(int af, const struct sk_buff *skb, const struct ip_vs_iphdr *iph, unsigned int proto_off, int inverse) { - __be16 _ports[2], *pptr; + struct ip_vs_conn_param p; - pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); - if (pptr == NULL) + if (ip_vs_conn_fill_param_proto(af, skb, iph, proto_off, inverse, &p)) return NULL; - if (likely(!inverse)) - return ip_vs_conn_out_get(af, iph->protocol, - &iph->saddr, pptr[0], - &iph->daddr, pptr[1]); - else - return ip_vs_conn_out_get(af, iph->protocol, - &iph->daddr, pptr[1], - &iph->saddr, pptr[0]); + return ip_vs_conn_out_get(&p); } EXPORT_SYMBOL_GPL(ip_vs_conn_out_get_proto); @@ -758,13 +756,12 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, - const union nf_inet_addr *vaddr, __be16 vport, +ip_vs_conn_new(const struct ip_vs_conn_param *p, const union nf_inet_addr *daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; - struct ip_vs_protocol *pp = ip_vs_proto_get(proto); + struct ip_vs_protocol *pp = ip_vs_proto_get(p->protocol); cp = kmem_cache_zalloc(ip_vs_conn_cachep, GFP_ATOMIC); if (cp == NULL) { @@ -774,14 +771,14 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, INIT_LIST_HEAD(&cp->c_list); setup_timer(&cp->timer, ip_vs_conn_expire, (unsigned long)cp); - cp->af = af; - cp->protocol = proto; - ip_vs_addr_copy(af, &cp->caddr, caddr); - cp->cport = cport; - ip_vs_addr_copy(af, &cp->vaddr, vaddr); - cp->vport = vport; + cp->af = p->af; + cp->protocol = p->protocol; + ip_vs_addr_copy(p->af, &cp->caddr, p->caddr); + cp->cport = p->cport; + ip_vs_addr_copy(p->af, &cp->vaddr, p->vaddr); + cp->vport = p->vport; /* proto should only be IPPROTO_IP if d_addr is a fwmark */ - ip_vs_addr_copy(proto == IPPROTO_IP ? AF_UNSPEC : af, + ip_vs_addr_copy(p->protocol == IPPROTO_IP ? AF_UNSPEC : p->af, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; @@ -810,7 +807,7 @@ ip_vs_conn_new(int af, int proto, const union nf_inet_addr *caddr, __be16 cport, /* Bind its packet transmitter */ #ifdef CONFIG_IP_VS_IPV6 - if (af == AF_INET6) + if (p->af == AF_INET6) ip_vs_bind_xmit_v6(cp); else #endif diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 70a5cacf86d..87602a62458 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -193,14 +193,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, struct ip_vs_iphdr iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - int protocol = iph.protocol; __be16 dport = 0; /* destination port to forward */ - __be16 vport = 0; /* virtual service port */ unsigned int flags; + struct ip_vs_conn_param param; union nf_inet_addr snet; /* source network of the client, after masking */ - const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; - const union nf_inet_addr *vaddr = &iph.daddr; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); @@ -232,6 +229,11 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * is created for other persistent services. */ { + int protocol = iph.protocol; + const union nf_inet_addr *vaddr = &iph.daddr; + const union nf_inet_addr fwmark = { .ip = htonl(svc->fwmark) }; + __be16 vport = 0; + if (ports[1] == svc->port) { /* non-FTP template: * @@ -253,11 +255,12 @@ ip_vs_sched_persist(struct ip_vs_service *svc, vaddr = &fwmark; } } + ip_vs_conn_fill_param(svc->af, protocol, &snet, 0, + vaddr, vport, ¶m); } /* Check if a template already exists */ - ct = ip_vs_ct_in_get(svc->af, protocol, &snet, 0, vaddr, vport); - + ct = ip_vs_ct_in_get(¶m); if (!ct || !ip_vs_check_template(ct)) { /* No template found or the dest of the connection * template is not available. @@ -272,8 +275,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dport = dest->port; /* Create a template */ - ct = ip_vs_conn_new(svc->af, protocol, &snet, 0,vaddr, vport, - &dest->addr, dport, + ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest); if (ct == NULL) return NULL; @@ -294,12 +296,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* * Create a new connection according to the template */ - cp = ip_vs_conn_new(svc->af, iph.protocol, - &iph.saddr, ports[0], - &iph.daddr, ports[1], - &dest->addr, dport, - flags, - dest); + ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, ports[0], + &iph.daddr, ports[1], ¶m); + cp = ip_vs_conn_new(¶m, &dest->addr, dport, flags, dest); if (cp == NULL) { ip_vs_conn_put(ct); return NULL; @@ -366,14 +365,16 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) /* * Create a connection entry. */ - cp = ip_vs_conn_new(svc->af, iph.protocol, - &iph.saddr, pptr[0], - &iph.daddr, pptr[1], - &dest->addr, dest->port ? dest->port : pptr[1], - flags, - dest); - if (cp == NULL) - return NULL; + { + struct ip_vs_conn_param p; + ip_vs_conn_fill_param(svc->af, iph.protocol, &iph.saddr, + pptr[0], &iph.daddr, pptr[1], &p); + cp = ip_vs_conn_new(&p, &dest->addr, + dest->port ? dest->port : pptr[1], + flags, dest); + if (!cp) + return NULL; + } IP_VS_DBG_BUF(6, "Schedule fwd:%c c:%s:%u v:%s:%u " "d:%s:%u conn->flags:%X conn->refcnt:%d\n", @@ -429,14 +430,17 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, /* create a new connection entry */ IP_VS_DBG(6, "%s(): create a cache_bypass entry\n", __func__); - cp = ip_vs_conn_new(svc->af, iph.protocol, - &iph.saddr, pptr[0], - &iph.daddr, pptr[1], - &daddr, 0, - IP_VS_CONN_F_BYPASS | flags, - NULL); - if (cp == NULL) - return NF_DROP; + { + struct ip_vs_conn_param p; + ip_vs_conn_fill_param(svc->af, iph.protocol, + &iph.saddr, pptr[0], + &iph.daddr, pptr[1], &p); + cp = ip_vs_conn_new(&p, &daddr, 0, + IP_VS_CONN_F_BYPASS | flags, + NULL); + if (!cp) + return NF_DROP; + } /* statistics */ ip_vs_in_stats(cp, skb); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 9cd375f94d6..090889a3b3a 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -195,13 +195,17 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, /* * Now update or create an connection entry for it */ - n_cp = ip_vs_conn_out_get(AF_INET, iph->protocol, &from, port, - &cp->caddr, 0); + { + struct ip_vs_conn_param p; + ip_vs_conn_fill_param(AF_INET, iph->protocol, + &from, port, &cp->caddr, 0, &p); + n_cp = ip_vs_conn_out_get(&p); + } if (!n_cp) { - n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, - &cp->caddr, 0, - &cp->vaddr, port, - &from, port, + struct ip_vs_conn_param p; + ip_vs_conn_fill_param(AF_INET, IPPROTO_TCP, &cp->caddr, + 0, &cp->vaddr, port, &p); + n_cp = ip_vs_conn_new(&p, &from, port, IP_VS_CONN_F_NO_CPORT | IP_VS_CONN_F_NFCT, cp->dest); @@ -347,21 +351,22 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, ip_vs_proto_name(iph->protocol), &to.ip, ntohs(port), &cp->vaddr.ip, 0); - n_cp = ip_vs_conn_in_get(AF_INET, iph->protocol, - &to, port, - &cp->vaddr, htons(ntohs(cp->vport)-1)); - if (!n_cp) { - n_cp = ip_vs_conn_new(AF_INET, IPPROTO_TCP, - &to, port, + { + struct ip_vs_conn_param p; + ip_vs_conn_fill_param(AF_INET, iph->protocol, &to, port, &cp->vaddr, htons(ntohs(cp->vport)-1), - &cp->daddr, htons(ntohs(cp->dport)-1), - IP_VS_CONN_F_NFCT, - cp->dest); - if (!n_cp) - return 0; + &p); + n_cp = ip_vs_conn_in_get(&p); + if (!n_cp) { + n_cp = ip_vs_conn_new(&p, &cp->daddr, + htons(ntohs(cp->dport)-1), + IP_VS_CONN_F_NFCT, cp->dest); + if (!n_cp) + return 0; - /* add its controller */ - ip_vs_control_add(n_cp, cp); + /* add its controller */ + ip_vs_control_add(n_cp, cp); + } } /* diff --git a/net/netfilter/ipvs/ip_vs_nfct.c b/net/netfilter/ipvs/ip_vs_nfct.c index c038458d029..4680647cd45 100644 --- a/net/netfilter/ipvs/ip_vs_nfct.c +++ b/net/netfilter/ipvs/ip_vs_nfct.c @@ -140,6 +140,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, { struct nf_conntrack_tuple *orig, new_reply; struct ip_vs_conn *cp; + struct ip_vs_conn_param p; if (exp->tuple.src.l3num != PF_INET) return; @@ -154,9 +155,10 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, /* RS->CLIENT */ orig = &ct->tuplehash[IP_CT_DIR_ORIGINAL].tuple; - cp = ip_vs_conn_out_get(exp->tuple.src.l3num, orig->dst.protonum, - &orig->src.u3, orig->src.u.tcp.port, - &orig->dst.u3, orig->dst.u.tcp.port); + ip_vs_conn_fill_param(exp->tuple.src.l3num, orig->dst.protonum, + &orig->src.u3, orig->src.u.tcp.port, + &orig->dst.u3, orig->dst.u.tcp.port, &p); + cp = ip_vs_conn_out_get(&p); if (cp) { /* Change reply CLIENT->RS to CLIENT->VS */ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; @@ -176,9 +178,7 @@ static void ip_vs_nfct_expect_callback(struct nf_conn *ct, } /* CLIENT->VS */ - cp = ip_vs_conn_in_get(exp->tuple.src.l3num, orig->dst.protonum, - &orig->src.u3, orig->src.u.tcp.port, - &orig->dst.u3, orig->dst.u.tcp.port); + cp = ip_vs_conn_in_get(&p); if (cp) { /* Change reply VS->CLIENT to RS->CLIENT */ new_reply = ct->tuplehash[IP_CT_DIR_REPLY].tuple; diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 1892dfc12fd..8956ef33ea6 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -40,6 +40,19 @@ struct isakmp_hdr { #define PORT_ISAKMP 500 +static void +ah_esp_conn_fill_param_proto(int af, const struct ip_vs_iphdr *iph, + int inverse, struct ip_vs_conn_param *p) +{ + if (likely(!inverse)) + ip_vs_conn_fill_param(af, IPPROTO_UDP, + &iph->saddr, htons(PORT_ISAKMP), + &iph->daddr, htons(PORT_ISAKMP), p); + else + ip_vs_conn_fill_param(af, IPPROTO_UDP, + &iph->daddr, htons(PORT_ISAKMP), + &iph->saddr, htons(PORT_ISAKMP), p); +} static struct ip_vs_conn * ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, @@ -47,21 +60,10 @@ ah_esp_conn_in_get(int af, const struct sk_buff *skb, struct ip_vs_protocol *pp, int inverse) { struct ip_vs_conn *cp; + struct ip_vs_conn_param p; - if (likely(!inverse)) { - cp = ip_vs_conn_in_get(af, IPPROTO_UDP, - &iph->saddr, - htons(PORT_ISAKMP), - &iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_in_get(af, IPPROTO_UDP, - &iph->daddr, - htons(PORT_ISAKMP), - &iph->saddr, - htons(PORT_ISAKMP)); - } - + ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + cp = ip_vs_conn_in_get(&p); if (!cp) { /* * We are not sure if the packet is from our @@ -87,21 +89,10 @@ ah_esp_conn_out_get(int af, const struct sk_buff *skb, int inverse) { struct ip_vs_conn *cp; + struct ip_vs_conn_param p; - if (likely(!inverse)) { - cp = ip_vs_conn_out_get(af, IPPROTO_UDP, - &iph->saddr, - htons(PORT_ISAKMP), - &iph->daddr, - htons(PORT_ISAKMP)); - } else { - cp = ip_vs_conn_out_get(af, IPPROTO_UDP, - &iph->daddr, - htons(PORT_ISAKMP), - &iph->saddr, - htons(PORT_ISAKMP)); - } - + ah_esp_conn_fill_param_proto(af, iph, inverse, &p); + cp = ip_vs_conn_out_get(&p); if (!cp) { IP_VS_DBG_BUF(12, "Unknown ISAKMP entry for inout packet " "%s%s %s->%s\n", diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index 7ba06939829..f68631f75f0 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -301,6 +301,7 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) struct ip_vs_conn *cp; struct ip_vs_protocol *pp; struct ip_vs_dest *dest; + struct ip_vs_conn_param param; char *p; int i; @@ -370,18 +371,17 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } } - if (!(flags & IP_VS_CONN_F_TEMPLATE)) - cp = ip_vs_conn_in_get(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport); - else - cp = ip_vs_ct_in_get(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport); + { + ip_vs_conn_fill_param(AF_INET, s->protocol, + (union nf_inet_addr *)&s->caddr, + s->cport, + (union nf_inet_addr *)&s->vaddr, + s->vport, ¶m); + if (!(flags & IP_VS_CONN_F_TEMPLATE)) + cp = ip_vs_conn_in_get(¶m); + else + cp = ip_vs_ct_in_get(¶m); + } if (!cp) { /* * Find the appropriate destination for the connection. @@ -406,14 +406,9 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) else flags &= ~IP_VS_CONN_F_INACTIVE; } - cp = ip_vs_conn_new(AF_INET, s->protocol, - (union nf_inet_addr *)&s->caddr, - s->cport, - (union nf_inet_addr *)&s->vaddr, - s->vport, + cp = ip_vs_conn_new(¶m, (union nf_inet_addr *)&s->daddr, - s->dport, - flags, dest); + s->dport, flags, dest); if (dest) atomic_dec(&dest->refcnt); if (!cp) { -- cgit v1.2.3 From 85999283a21ab2dd37427fdd8c8e8af57223977c Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add struct ip_vs_pe Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/linux/ip_vs.h | 2 ++ include/net/ip_vs.h | 28 ++++++++++++++++- net/netfilter/ipvs/ip_vs_conn.c | 67 ++++++++++++++++++++++++++++++++++------- net/netfilter/ipvs/ip_vs_core.c | 36 +++++++++++++++++----- net/netfilter/ipvs/ip_vs_sync.c | 17 +++++++++-- 5 files changed, 129 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index df772861372..0a9c44d6429 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -99,8 +99,10 @@ 0) #define IP_VS_SCHEDNAME_MAXLEN 16 +#define IP_VS_PENAME_MAXLEN 16 #define IP_VS_IFNAME_MAXLEN 16 +#define IP_VS_PEDATA_MAXLEN 255 /* * The struct ip_vs_service_user and struct ip_vs_dest_user are diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index d4da774eca7..b6b309d05e4 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -364,6 +364,10 @@ struct ip_vs_conn_param { __be16 vport; __u16 protocol; u16 af; + + const struct ip_vs_pe *pe; + char *pe_data; + __u8 pe_data_len; }; /* @@ -416,6 +420,9 @@ struct ip_vs_conn { void *app_data; /* Application private data */ struct ip_vs_seq in_seq; /* incoming seq. struct */ struct ip_vs_seq out_seq; /* outgoing seq. struct */ + + char *pe_data; + __u8 pe_data_len; }; @@ -486,6 +493,9 @@ struct ip_vs_service { struct ip_vs_scheduler *scheduler; /* bound scheduler object */ rwlock_t sched_lock; /* lock sched_data */ void *sched_data; /* scheduler application data */ + + /* alternate persistence engine */ + struct ip_vs_pe *pe; }; @@ -549,6 +559,20 @@ struct ip_vs_scheduler { const struct sk_buff *skb); }; +/* The persistence engine object */ +struct ip_vs_pe { + struct list_head n_list; /* d-linked list head */ + char *name; /* scheduler name */ + atomic_t refcnt; /* reference counter */ + struct module *module; /* THIS_MODULE/NULL */ + + /* get the connection template, if any */ + int (*fill_param)(struct ip_vs_conn_param *p, struct sk_buff *skb); + bool (*ct_match)(const struct ip_vs_conn_param *p, + struct ip_vs_conn *ct); + u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, + bool inverse); +}; /* * The application module object (a.k.a. app incarnation) @@ -648,6 +672,8 @@ static inline void ip_vs_conn_fill_param(int af, int protocol, p->cport = cport; p->vaddr = vaddr; p->vport = vport; + p->pe = NULL; + p->pe_data = NULL; } struct ip_vs_conn *ip_vs_conn_in_get(const struct ip_vs_conn_param *p); @@ -803,7 +829,7 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index deeb906a797..06da21e9740 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -148,6 +148,42 @@ static unsigned int ip_vs_conn_hashkey(int af, unsigned proto, & ip_vs_conn_tab_mask; } +static unsigned int ip_vs_conn_hashkey_param(const struct ip_vs_conn_param *p, + bool inverse) +{ + const union nf_inet_addr *addr; + __be16 port; + + if (p->pe && p->pe->hashkey_raw) + return p->pe->hashkey_raw(p, ip_vs_conn_rnd, inverse) & + ip_vs_conn_tab_mask; + + if (likely(!inverse)) { + addr = p->caddr; + port = p->cport; + } else { + addr = p->vaddr; + port = p->vport; + } + + return ip_vs_conn_hashkey(p->af, p->protocol, addr, port); +} + +static unsigned int ip_vs_conn_hashkey_conn(const struct ip_vs_conn *cp) +{ + struct ip_vs_conn_param p; + + ip_vs_conn_fill_param(cp->af, cp->protocol, &cp->caddr, cp->cport, + NULL, 0, &p); + + if (cp->dest && cp->dest->svc->pe) { + p.pe = cp->dest->svc->pe; + p.pe_data = cp->pe_data; + p.pe_data_len = cp->pe_data_len; + } + + return ip_vs_conn_hashkey_param(&p, false); +} /* * Hashes ip_vs_conn in ip_vs_conn_tab by proto,addr,port. @@ -162,7 +198,7 @@ static inline int ip_vs_conn_hash(struct ip_vs_conn *cp) return 0; /* Hash by protocol, client address and port */ - hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey_conn(cp); ct_write_lock(hash); spin_lock(&cp->lock); @@ -195,7 +231,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) int ret; /* unhash it and decrease its reference counter */ - hash = ip_vs_conn_hashkey(cp->af, cp->protocol, &cp->caddr, cp->cport); + hash = ip_vs_conn_hashkey_conn(cp); ct_write_lock(hash); spin_lock(&cp->lock); @@ -227,7 +263,7 @@ __ip_vs_conn_in_get(const struct ip_vs_conn_param *p) unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport); + hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); @@ -312,11 +348,17 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) unsigned hash; struct ip_vs_conn *cp; - hash = ip_vs_conn_hashkey(p->af, p->protocol, p->caddr, p->cport); + hash = ip_vs_conn_hashkey_param(p, false); ct_read_lock(hash); list_for_each_entry(cp, &ip_vs_conn_tab[hash], c_list) { + if (p->pe && p->pe->ct_match) { + if (p->pe->ct_match(p, cp)) + goto out; + continue; + } + if (cp->af == p->af && ip_vs_addr_equal(p->af, p->caddr, &cp->caddr) && /* protocol should only be IPPROTO_IP if @@ -325,15 +367,14 @@ struct ip_vs_conn *ip_vs_ct_in_get(const struct ip_vs_conn_param *p) p->af, p->vaddr, &cp->vaddr) && p->cport == cp->cport && p->vport == cp->vport && cp->flags & IP_VS_CONN_F_TEMPLATE && - p->protocol == cp->protocol) { - /* HIT */ - atomic_inc(&cp->refcnt); + p->protocol == cp->protocol) goto out; - } } cp = NULL; out: + if (cp) + atomic_inc(&cp->refcnt); ct_read_unlock(hash); IP_VS_DBG_BUF(9, "template lookup/in %s %s:%d->%s:%d %s\n", @@ -357,7 +398,7 @@ struct ip_vs_conn *ip_vs_conn_out_get(const struct ip_vs_conn_param *p) /* * Check for "full" addressed entries */ - hash = ip_vs_conn_hashkey(p->af, p->protocol, p->vaddr, p->vport); + hash = ip_vs_conn_hashkey_param(p, true); ct_read_lock(hash); @@ -722,6 +763,7 @@ static void ip_vs_conn_expire(unsigned long data) if (cp->flags & IP_VS_CONN_F_NFCT) ip_vs_conn_drop_conntrack(cp); + kfree(cp->pe_data); if (unlikely(cp->app != NULL)) ip_vs_unbind_app(cp); ip_vs_unbind_dest(cp); @@ -782,6 +824,10 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, &cp->daddr, daddr); cp->dport = dport; cp->flags = flags; + if (flags & IP_VS_CONN_F_TEMPLATE && p->pe_data) { + cp->pe_data = p->pe_data; + cp->pe_data_len = p->pe_data_len; + } spin_lock_init(&cp->lock); /* @@ -832,7 +878,6 @@ ip_vs_conn_new(const struct ip_vs_conn_param *p, return cp; } - /* * /proc/net/ip_vs_conn entries */ @@ -848,7 +893,7 @@ static void *ip_vs_conn_array(struct seq_file *seq, loff_t pos) list_for_each_entry(cp, &ip_vs_conn_tab[idx], c_list) { if (pos-- == 0) { seq->private = &ip_vs_conn_tab[idx]; - return cp; + return cp; } } ct_read_unlock_bh(idx); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 87602a62458..ab988938049 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -176,6 +176,19 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, return pp->state_transition(cp, direction, skb, pp); } +static inline int +ip_vs_conn_fill_param_persist(const struct ip_vs_service *svc, + struct sk_buff *skb, int protocol, + const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + struct ip_vs_conn_param *p) +{ + ip_vs_conn_fill_param(svc->af, protocol, caddr, cport, vaddr, vport, p); + p->pe = svc->pe; + if (p->pe && p->pe->fill_param) + return p->pe->fill_param(p, skb); + return 0; +} /* * IPVS persistent scheduling function @@ -186,7 +199,7 @@ ip_vs_set_state(struct ip_vs_conn *cp, int direction, */ static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, - const struct sk_buff *skb, + struct sk_buff *skb, __be16 ports[2]) { struct ip_vs_conn *cp = NULL; @@ -255,8 +268,9 @@ ip_vs_sched_persist(struct ip_vs_service *svc, vaddr = &fwmark; } } - ip_vs_conn_fill_param(svc->af, protocol, &snet, 0, - vaddr, vport, ¶m); + if (ip_vs_conn_fill_param_persist(svc, skb, protocol, &snet, 0, + vaddr, vport, ¶m)) + return NULL; } /* Check if a template already exists */ @@ -268,22 +282,30 @@ ip_vs_sched_persist(struct ip_vs_service *svc, dest = svc->scheduler->schedule(svc, skb); if (!dest) { IP_VS_DBG(1, "p-schedule: no dest found.\n"); + kfree(param.pe_data); return NULL; } if (ports[1] == svc->port && svc->port != FTPPORT) dport = dest->port; - /* Create a template */ + /* Create a template + * This adds param.pe_data to the template, + * and thus param.pe_data will be destroyed + * when the template expires */ ct = ip_vs_conn_new(¶m, &dest->addr, dport, IP_VS_CONN_F_TEMPLATE, dest); - if (ct == NULL) + if (ct == NULL) { + kfree(param.pe_data); return NULL; + } ct->timeout = svc->timeout; - } else + } else { /* set destination with the found template */ dest = ct->dest; + kfree(param.pe_data); + } dport = ports[1]; if (dport == svc->port && dest->port) @@ -322,7 +344,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * Protocols supported: TCP, UDP */ struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; diff --git a/net/netfilter/ipvs/ip_vs_sync.c b/net/netfilter/ipvs/ip_vs_sync.c index f68631f75f0..ab85aedea17 100644 --- a/net/netfilter/ipvs/ip_vs_sync.c +++ b/net/netfilter/ipvs/ip_vs_sync.c @@ -288,6 +288,16 @@ void ip_vs_sync_conn(struct ip_vs_conn *cp) ip_vs_sync_conn(cp->control); } +static inline int +ip_vs_conn_fill_param_sync(int af, int protocol, + const union nf_inet_addr *caddr, __be16 cport, + const union nf_inet_addr *vaddr, __be16 vport, + struct ip_vs_conn_param *p) +{ + /* XXX: Need to take into account persistence engine */ + ip_vs_conn_fill_param(af, protocol, caddr, cport, vaddr, vport, p); + return 0; +} /* * Process received multicast message and create the corresponding @@ -372,11 +382,14 @@ static void ip_vs_process_message(const char *buffer, const size_t buflen) } { - ip_vs_conn_fill_param(AF_INET, s->protocol, + if (ip_vs_conn_fill_param_sync(AF_INET, s->protocol, (union nf_inet_addr *)&s->caddr, s->cport, (union nf_inet_addr *)&s->vaddr, - s->vport, ¶m); + s->vport, ¶m)) { + pr_err("ip_vs_conn_fill_param_sync failed"); + return; + } if (!(flags & IP_VS_CONN_F_TEMPLATE)) cp = ip_vs_conn_in_get(¶m); else -- cgit v1.2.3 From a3c918acd29a96aba3b46bf50136e7953a480d17 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:53 +0900 Subject: IPVS: Add persistence engine data to /proc/net/ip_vs_conn This shouldn't break compatibility with userspace as the new data is at the end of the line. I have confirmed that this doesn't break ipvsadm, the main (only?) user-space user of this data. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_conn.c | 25 ++++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index b6b309d05e4..974daf52ba7 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -572,6 +572,7 @@ struct ip_vs_pe { struct ip_vs_conn *ct); u32 (*hashkey_raw)(const struct ip_vs_conn_param *p, u32 initval, bool inverse); + int (*show_pe_data)(const struct ip_vs_conn *cp, char *buf); }; /* diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 06da21e9740..4adedefdf56 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -950,30 +950,45 @@ static int ip_vs_conn_seq_show(struct seq_file *seq, void *v) if (v == SEQ_START_TOKEN) seq_puts(seq, - "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires\n"); + "Pro FromIP FPrt ToIP TPrt DestIP DPrt State Expires PEName PEData\n"); else { const struct ip_vs_conn *cp = v; + char pe_data[IP_VS_PENAME_MAXLEN + IP_VS_PEDATA_MAXLEN + 3]; + size_t len = 0; + + if (cp->dest && cp->dest->svc->pe && + cp->dest->svc->pe->show_pe_data) { + pe_data[0] = ' '; + len = strlen(cp->dest->svc->pe->name); + memcpy(pe_data + 1, cp->dest->svc->pe->name, len); + pe_data[len + 1] = ' '; + len += 2; + len += cp->dest->svc->pe->show_pe_data(cp, + pe_data + len); + } + pe_data[len] = '\0'; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) - seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X %pI6 %04X %-11s %7lu\n", + seq_printf(seq, "%-3s %pI6 %04X %pI6 %04X " + "%pI6 %04X %-11s %7lu%s\n", ip_vs_proto_name(cp->protocol), &cp->caddr.in6, ntohs(cp->cport), &cp->vaddr.in6, ntohs(cp->vport), &cp->daddr.in6, ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), - (cp->timer.expires-jiffies)/HZ); + (cp->timer.expires-jiffies)/HZ, pe_data); else #endif seq_printf(seq, "%-3s %08X %04X %08X %04X" - " %08X %04X %-11s %7lu\n", + " %08X %04X %-11s %7lu%s\n", ip_vs_proto_name(cp->protocol), ntohl(cp->caddr.ip), ntohs(cp->cport), ntohl(cp->vaddr.ip), ntohs(cp->vport), ntohl(cp->daddr.ip), ntohs(cp->dport), ip_vs_state_name(cp->protocol, cp->state), - (cp->timer.expires-jiffies)/HZ); + (cp->timer.expires-jiffies)/HZ, pe_data); } return 0; } -- cgit v1.2.3 From 8be67a6617b3403551fccb67b1c624c659419515 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: management of persistence engine modules This is based heavily on the scheduler management code Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/net/ip_vs.h | 6 ++ net/netfilter/ipvs/Makefile | 2 +- net/netfilter/ipvs/ip_vs_pe.c | 147 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 1 deletion(-) create mode 100644 net/netfilter/ipvs/ip_vs_pe.c (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 974daf52ba7..a6b93a26d4e 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -796,6 +796,12 @@ extern int ip_vs_app_pkt_in(struct ip_vs_conn *, struct sk_buff *skb); extern int ip_vs_app_init(void); extern void ip_vs_app_cleanup(void); +void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe); +void ip_vs_unbind_pe(struct ip_vs_service *svc); +int register_ip_vs_pe(struct ip_vs_pe *pe); +int unregister_ip_vs_pe(struct ip_vs_pe *pe); +extern struct ip_vs_pe *ip_vs_pe_get(const char *name); +extern void ip_vs_pe_put(struct ip_vs_pe *pe); /* * IPVS protocol functions (from ip_vs_proto.c) diff --git a/net/netfilter/ipvs/Makefile b/net/netfilter/ipvs/Makefile index 349fe8819b8..4a87bf3c629 100644 --- a/net/netfilter/ipvs/Makefile +++ b/net/netfilter/ipvs/Makefile @@ -14,7 +14,7 @@ ip_vs-extra_objs-$(CONFIG_IP_VS_NFCT) += ip_vs_nfct.o ip_vs-objs := ip_vs_conn.o ip_vs_core.o ip_vs_ctl.o ip_vs_sched.o \ ip_vs_xmit.o ip_vs_app.o ip_vs_sync.o \ - ip_vs_est.o ip_vs_proto.o \ + ip_vs_est.o ip_vs_proto.o ip_vs_pe.o \ $(ip_vs_proto-objs-y) $(ip_vs-extra_objs-y) diff --git a/net/netfilter/ipvs/ip_vs_pe.c b/net/netfilter/ipvs/ip_vs_pe.c new file mode 100644 index 00000000000..3414af70ee1 --- /dev/null +++ b/net/netfilter/ipvs/ip_vs_pe.c @@ -0,0 +1,147 @@ +#define KMSG_COMPONENT "IPVS" +#define pr_fmt(fmt) KMSG_COMPONENT ": " fmt + +#include +#include +#include +#include +#include +#include + +#include + +/* IPVS pe list */ +static LIST_HEAD(ip_vs_pe); + +/* lock for service table */ +static DEFINE_SPINLOCK(ip_vs_pe_lock); + +/* Bind a service with a pe */ +void ip_vs_bind_pe(struct ip_vs_service *svc, struct ip_vs_pe *pe) +{ + svc->pe = pe; +} + +/* Unbind a service from its pe */ +void ip_vs_unbind_pe(struct ip_vs_service *svc) +{ + svc->pe = NULL; +} + +/* Get pe in the pe list by name */ +static struct ip_vs_pe * +ip_vs_pe_getbyname(const char *pe_name) +{ + struct ip_vs_pe *pe; + + IP_VS_DBG(2, "%s(): pe_name \"%s\"\n", __func__, + pe_name); + + spin_lock_bh(&ip_vs_pe_lock); + + list_for_each_entry(pe, &ip_vs_pe, n_list) { + /* Test and get the modules atomically */ + if (pe->module && + !try_module_get(pe->module)) { + /* This pe is just deleted */ + continue; + } + if (strcmp(pe_name, pe->name)==0) { + /* HIT */ + spin_unlock_bh(&ip_vs_pe_lock); + return pe; + } + if (pe->module) + module_put(pe->module); + } + + spin_unlock_bh(&ip_vs_pe_lock); + return NULL; +} + +/* Lookup pe and try to load it if it doesn't exist */ +struct ip_vs_pe *ip_vs_pe_get(const char *name) +{ + struct ip_vs_pe *pe; + + /* Search for the pe by name */ + pe = ip_vs_pe_getbyname(name); + + /* If pe not found, load the module and search again */ + if (!pe) { + request_module("ip_vs_pe_%s", name); + pe = ip_vs_pe_getbyname(name); + } + + return pe; +} + +void ip_vs_pe_put(struct ip_vs_pe *pe) +{ + if (pe && pe->module) + module_put(pe->module); +} + +/* Register a pe in the pe list */ +int register_ip_vs_pe(struct ip_vs_pe *pe) +{ + struct ip_vs_pe *tmp; + + /* increase the module use count */ + ip_vs_use_count_inc(); + + spin_lock_bh(&ip_vs_pe_lock); + + if (!list_empty(&pe->n_list)) { + spin_unlock_bh(&ip_vs_pe_lock); + ip_vs_use_count_dec(); + pr_err("%s(): [%s] pe already linked\n", + __func__, pe->name); + return -EINVAL; + } + + /* Make sure that the pe with this name doesn't exist + * in the pe list. + */ + list_for_each_entry(tmp, &ip_vs_pe, n_list) { + if (strcmp(tmp->name, pe->name) == 0) { + spin_unlock_bh(&ip_vs_pe_lock); + ip_vs_use_count_dec(); + pr_err("%s(): [%s] pe already existed " + "in the system\n", __func__, pe->name); + return -EINVAL; + } + } + /* Add it into the d-linked pe list */ + list_add(&pe->n_list, &ip_vs_pe); + spin_unlock_bh(&ip_vs_pe_lock); + + pr_info("[%s] pe registered.\n", pe->name); + + return 0; +} +EXPORT_SYMBOL_GPL(register_ip_vs_pe); + +/* Unregister a pe from the pe list */ +int unregister_ip_vs_pe(struct ip_vs_pe *pe) +{ + spin_lock_bh(&ip_vs_pe_lock); + if (list_empty(&pe->n_list)) { + spin_unlock_bh(&ip_vs_pe_lock); + pr_err("%s(): [%s] pe is not in the list. failed\n", + __func__, pe->name); + return -EINVAL; + } + + /* Remove it from the d-linked pe list */ + list_del(&pe->n_list); + spin_unlock_bh(&ip_vs_pe_lock); + + /* decrease the module use count */ + ip_vs_use_count_dec(); + + pr_info("[%s] pe unregistered.\n", pe->name); + + return 0; +} +EXPORT_SYMBOL_GPL(unregister_ip_vs_pe); -- cgit v1.2.3 From 0d1e71b04a04b6912e50926b9987c1e72facb1f3 Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Sun, 22 Aug 2010 21:37:54 +0900 Subject: IPVS: Allow configuration of persistence engines Allow the persistence engine of a virtual service to be set, edited and unset. This feature only works with the netlink user-space interface. Signed-off-by: Simon Horman Acked-by: Julian Anastasov --- include/linux/ip_vs.h | 3 +++ include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_ctl.c | 57 +++++++++++++++++++++++++++++++++++++++--- 3 files changed, 58 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/linux/ip_vs.h b/include/linux/ip_vs.h index 0a9c44d6429..5f43a3b2e3a 100644 --- a/include/linux/ip_vs.h +++ b/include/linux/ip_vs.h @@ -336,6 +336,9 @@ enum { IPVS_SVC_ATTR_NETMASK, /* persistent netmask */ IPVS_SVC_ATTR_STATS, /* nested attribute for service stats */ + + IPVS_SVC_ATTR_PE_NAME, /* name of ct retriever */ + __IPVS_SVC_ATTR_MAX, }; diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index a6b93a26d4e..52fbe2308c3 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -444,6 +444,7 @@ struct ip_vs_service_user_kern { /* virtual service options */ char *sched_name; + char *pe_name; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout in sec */ u32 netmask; /* persistent netmask */ diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 5f20caf47a1..a697591d0e2 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1134,6 +1134,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, { int ret = 0; struct ip_vs_scheduler *sched = NULL; + struct ip_vs_pe *pe = NULL; struct ip_vs_service *svc = NULL; /* increase the module use count */ @@ -1147,6 +1148,16 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, goto out_err; } + if (u->pe_name && *u->pe_name) { + pe = ip_vs_pe_get(u->pe_name); + if (pe == NULL) { + pr_info("persistence engine module ip_vs_pe_%s " + "not found\n", u->pe_name); + ret = -ENOENT; + goto out_err; + } + } + #ifdef CONFIG_IP_VS_IPV6 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { ret = -EINVAL; @@ -1184,6 +1195,10 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, goto out_err; sched = NULL; + /* Bind the ct retriever */ + ip_vs_bind_pe(svc, pe); + pe = NULL; + /* Update the virtual service counters */ if (svc->port == FTPPORT) atomic_inc(&ip_vs_ftpsvc_counter); @@ -1215,6 +1230,7 @@ ip_vs_add_service(struct ip_vs_service_user_kern *u, kfree(svc); } ip_vs_scheduler_put(sched); + ip_vs_pe_put(pe); /* decrease the module use count */ ip_vs_use_count_dec(); @@ -1230,6 +1246,7 @@ static int ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) { struct ip_vs_scheduler *sched, *old_sched; + struct ip_vs_pe *pe = NULL, *old_pe = NULL; int ret = 0; /* @@ -1242,6 +1259,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } old_sched = sched; + if (u->pe_name && *u->pe_name) { + pe = ip_vs_pe_get(u->pe_name); + if (pe == NULL) { + pr_info("persistence engine module ip_vs_pe_%s " + "not found\n", u->pe_name); + ret = -ENOENT; + goto out; + } + old_pe = pe; + } + #ifdef CONFIG_IP_VS_IPV6 if (u->af == AF_INET6 && (u->netmask < 1 || u->netmask > 128)) { ret = -EINVAL; @@ -1293,12 +1321,17 @@ ip_vs_edit_service(struct ip_vs_service *svc, struct ip_vs_service_user_kern *u) } } + old_pe = svc->pe; + if (pe != old_pe) { + ip_vs_unbind_pe(svc); + ip_vs_bind_pe(svc, pe); + } + out_unlock: write_unlock_bh(&__ip_vs_svc_lock); -#ifdef CONFIG_IP_VS_IPV6 out: -#endif ip_vs_scheduler_put(old_sched); + ip_vs_pe_put(old_pe); return ret; } @@ -1312,6 +1345,9 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) { struct ip_vs_dest *dest, *nxt; struct ip_vs_scheduler *old_sched; + struct ip_vs_pe *old_pe; + + pr_info("%s: enter\n", __func__); /* Count only IPv4 services for old get/setsockopt interface */ if (svc->af == AF_INET) @@ -1324,6 +1360,11 @@ static void __ip_vs_del_service(struct ip_vs_service *svc) ip_vs_unbind_scheduler(svc); ip_vs_scheduler_put(old_sched); + /* Unbind persistence engine */ + old_pe = svc->pe; + ip_vs_unbind_pe(svc); + ip_vs_pe_put(old_pe); + /* Unbind app inc */ if (svc->inc) { ip_vs_app_inc_put(svc->inc); @@ -2026,6 +2067,8 @@ static const unsigned char set_arglen[SET_CMDID(IP_VS_SO_SET_MAX)+1] = { static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, struct ip_vs_service_user *usvc_compat) { + memset(usvc, 0, sizeof(*usvc)); + usvc->af = AF_INET; usvc->protocol = usvc_compat->protocol; usvc->addr.ip = usvc_compat->addr; @@ -2043,6 +2086,8 @@ static void ip_vs_copy_usvc_compat(struct ip_vs_service_user_kern *usvc, static void ip_vs_copy_udest_compat(struct ip_vs_dest_user_kern *udest, struct ip_vs_dest_user *udest_compat) { + memset(udest, 0, sizeof(*udest)); + udest->addr.ip = udest_compat->addr; udest->port = udest_compat->port; udest->conn_flags = udest_compat->conn_flags; @@ -2539,6 +2584,8 @@ static const struct nla_policy ip_vs_svc_policy[IPVS_SVC_ATTR_MAX + 1] = { [IPVS_SVC_ATTR_FWMARK] = { .type = NLA_U32 }, [IPVS_SVC_ATTR_SCHED_NAME] = { .type = NLA_NUL_STRING, .len = IP_VS_SCHEDNAME_MAXLEN }, + [IPVS_SVC_ATTR_PE_NAME] = { .type = NLA_NUL_STRING, + .len = IP_VS_PENAME_MAXLEN }, [IPVS_SVC_ATTR_FLAGS] = { .type = NLA_BINARY, .len = sizeof(struct ip_vs_flags) }, [IPVS_SVC_ATTR_TIMEOUT] = { .type = NLA_U32 }, @@ -2615,6 +2662,8 @@ static int ip_vs_genl_fill_service(struct sk_buff *skb, } NLA_PUT_STRING(skb, IPVS_SVC_ATTR_SCHED_NAME, svc->scheduler->name); + if (svc->pe) + NLA_PUT_STRING(skb, IPVS_SVC_ATTR_PE_NAME, svc->pe->name); NLA_PUT(skb, IPVS_SVC_ATTR_FLAGS, sizeof(flags), &flags); NLA_PUT_U32(skb, IPVS_SVC_ATTR_TIMEOUT, svc->timeout / HZ); NLA_PUT_U32(skb, IPVS_SVC_ATTR_NETMASK, svc->netmask); @@ -2741,11 +2790,12 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, /* If a full entry was requested, check for the additional fields */ if (full_entry) { - struct nlattr *nla_sched, *nla_flags, *nla_timeout, + struct nlattr *nla_sched, *nla_flags, *nla_pe, *nla_timeout, *nla_netmask; struct ip_vs_flags flags; nla_sched = attrs[IPVS_SVC_ATTR_SCHED_NAME]; + nla_pe = attrs[IPVS_SVC_ATTR_PE_NAME]; nla_flags = attrs[IPVS_SVC_ATTR_FLAGS]; nla_timeout = attrs[IPVS_SVC_ATTR_TIMEOUT]; nla_netmask = attrs[IPVS_SVC_ATTR_NETMASK]; @@ -2763,6 +2813,7 @@ static int ip_vs_genl_parse_service(struct ip_vs_service_user_kern *usvc, usvc->flags = (usvc->flags & ~flags.mask) | (flags.flags & flags.mask); usvc->sched_name = nla_data(nla_sched); + usvc->pe_name = nla_pe ? nla_data(nla_pe) : NULL; usvc->timeout = nla_get_u32(nla_timeout); usvc->netmask = nla_get_u32(nla_netmask); } -- cgit v1.2.3 From 0c200d935346fe0ebde9b6dffbb683dddd166fb9 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 4 Oct 2010 20:53:18 +0200 Subject: netfilter: nf_nat: make find/put static The functions nf_nat_proto_find_get and nf_nat_proto_put are only used internally in nf_nat_core. This might break some out of tree NAT module. Signed-off-by: Stephen Hemminger Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_nat_protocol.h | 3 --- net/ipv4/netfilter/nf_nat_core.c | 6 ++---- 2 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_nat_protocol.h b/include/net/netfilter/nf_nat_protocol.h index df17bac46bf..93cc90d28e6 100644 --- a/include/net/netfilter/nf_nat_protocol.h +++ b/include/net/netfilter/nf_nat_protocol.h @@ -45,9 +45,6 @@ struct nf_nat_protocol { extern int nf_nat_protocol_register(const struct nf_nat_protocol *proto); extern void nf_nat_protocol_unregister(const struct nf_nat_protocol *proto); -extern const struct nf_nat_protocol *nf_nat_proto_find_get(u_int8_t protocol); -extern void nf_nat_proto_put(const struct nf_nat_protocol *proto); - /* Built-in protocols. */ extern const struct nf_nat_protocol nf_nat_protocol_tcp; extern const struct nf_nat_protocol nf_nat_protocol_udp; diff --git a/net/ipv4/netfilter/nf_nat_core.c b/net/ipv4/netfilter/nf_nat_core.c index 2c084b3a8f0..e2e00c4da88 100644 --- a/net/ipv4/netfilter/nf_nat_core.c +++ b/net/ipv4/netfilter/nf_nat_core.c @@ -47,7 +47,7 @@ __nf_nat_proto_find(u_int8_t protonum) return rcu_dereference(nf_nat_protos[protonum]); } -const struct nf_nat_protocol * +static const struct nf_nat_protocol * nf_nat_proto_find_get(u_int8_t protonum) { const struct nf_nat_protocol *p; @@ -60,14 +60,12 @@ nf_nat_proto_find_get(u_int8_t protonum) return p; } -EXPORT_SYMBOL_GPL(nf_nat_proto_find_get); -void +static void nf_nat_proto_put(const struct nf_nat_protocol *p) { module_put(p->me); } -EXPORT_SYMBOL_GPL(nf_nat_proto_put); /* We keep an extra hash for each conntrack, for fast searching. */ static inline unsigned int -- cgit v1.2.3 From eecc545856c8a0f27783a440d25f4ceaa1f95ce8 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 4 Oct 2010 23:24:21 +0200 Subject: netfilter: add missing xt_log.h file Forgot to add xt_log.h in commit a8defca0 (netfilter: ipt_LOG: add bufferisation to call printk() once) Signed-off-by: Patrick McHardy --- include/net/netfilter/xt_log.h | 54 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) create mode 100644 include/net/netfilter/xt_log.h (limited to 'include/net') diff --git a/include/net/netfilter/xt_log.h b/include/net/netfilter/xt_log.h new file mode 100644 index 00000000000..0dfb34a5b53 --- /dev/null +++ b/include/net/netfilter/xt_log.h @@ -0,0 +1,54 @@ +#define S_SIZE (1024 - (sizeof(unsigned int) + 1)) + +struct sbuff { + unsigned int count; + char buf[S_SIZE + 1]; +}; +static struct sbuff emergency, *emergency_ptr = &emergency; + +static int sb_add(struct sbuff *m, const char *f, ...) +{ + va_list args; + int len; + + if (likely(m->count < S_SIZE)) { + va_start(args, f); + len = vsnprintf(m->buf + m->count, S_SIZE - m->count, f, args); + va_end(args); + if (likely(m->count + len < S_SIZE)) { + m->count += len; + return 0; + } + } + m->count = S_SIZE; + printk_once(KERN_ERR KBUILD_MODNAME " please increase S_SIZE\n"); + return -1; +} + +static struct sbuff *sb_open(void) +{ + struct sbuff *m = kmalloc(sizeof(*m), GFP_ATOMIC); + + if (unlikely(!m)) { + local_bh_disable(); + do { + m = xchg(&emergency_ptr, NULL); + } while (!m); + } + m->count = 0; + return m; +} + +static void sb_close(struct sbuff *m) +{ + m->buf[m->count] = 0; + printk("%s\n", m->buf); + + if (likely(m != &emergency)) + kfree(m); + else { + xchg(&emergency_ptr, m); + local_bh_enable(); + } +} + -- cgit v1.2.3 From 1df9916e46451533463f227e6be57cc2cfca4c5f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:14:17 +0000 Subject: fib: fib_rules_cleanup can be static fib_rules_cleanup_ups is only defined and used in one place. Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 1 - net/core/fib_rules.c | 3 +-- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index e8923bc20f9..ac2fd002812 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -106,7 +106,6 @@ static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) extern struct fib_rules_ops *fib_rules_register(const struct fib_rules_ops *, struct net *); extern void fib_rules_unregister(struct fib_rules_ops *); -extern void fib_rules_cleanup_ops(struct fib_rules_ops *); extern int fib_rules_lookup(struct fib_rules_ops *, struct flowi *, int flags, diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 332c2e31d04..cfb7d25c172 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -144,7 +144,7 @@ fib_rules_register(const struct fib_rules_ops *tmpl, struct net *net) } EXPORT_SYMBOL_GPL(fib_rules_register); -void fib_rules_cleanup_ops(struct fib_rules_ops *ops) +static void fib_rules_cleanup_ops(struct fib_rules_ops *ops) { struct fib_rule *rule, *tmp; @@ -153,7 +153,6 @@ void fib_rules_cleanup_ops(struct fib_rules_ops *ops) fib_rule_put(rule); } } -EXPORT_SYMBOL_GPL(fib_rules_cleanup_ops); static void fib_rules_put_rcu(struct rcu_head *head) { -- cgit v1.2.3 From c61393ea83573ff422af505b6fd49ef9ec9b91ca Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 4 Oct 2010 20:17:53 +0000 Subject: ipv6: make __ipv6_isatap_ifid static Another exported symbol only used in one file Signed-off-by: Stephen Hemminger Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/addrconf.h | 2 -- net/ipv6/addrconf.c | 3 +-- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 958d2749b7a..a9441249306 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -276,8 +276,6 @@ static inline int ipv6_addr_is_ll_all_routers(const struct in6_addr *addr) (addr->s6_addr32[3] ^ htonl(0x00000002))) == 0; } -extern int __ipv6_isatap_ifid(u8 *eui, __be32 addr); - static inline int ipv6_addr_is_isatap(const struct in6_addr *addr) { return (addr->s6_addr32[2] | htonl(0x02000000)) == htonl(0x02005EFE); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 8c88340278f..ec7a91d9e86 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1544,7 +1544,7 @@ static int addrconf_ifid_infiniband(u8 *eui, struct net_device *dev) return 0; } -int __ipv6_isatap_ifid(u8 *eui, __be32 addr) +static int __ipv6_isatap_ifid(u8 *eui, __be32 addr) { if (addr == 0) return -1; @@ -1560,7 +1560,6 @@ int __ipv6_isatap_ifid(u8 *eui, __be32 addr) memcpy(eui + 4, &addr, 4); return 0; } -EXPORT_SYMBOL(__ipv6_isatap_ifid); static int addrconf_ifid_sit(u8 *eui, struct net_device *dev) { -- cgit v1.2.3 From 17e5a8082894a4b66cb69e7ec16074f0f01281e1 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Wed, 29 Sep 2010 17:15:30 +0200 Subject: nl80211: allow drivers to indicate whether the survey data channel is in use Some user space applications only want to display survey data for the operating channel, however there is no API to get that yet. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 2 ++ include/net/cfg80211.h | 2 ++ net/wireless/nl80211.c | 2 ++ 3 files changed, 6 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index f0518b0278a..edd21ae6acf 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1400,6 +1400,7 @@ enum nl80211_reg_rule_flags { * @__NL80211_SURVEY_INFO_INVALID: attribute number 0 is reserved * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) + * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number * currently defined * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use @@ -1408,6 +1409,7 @@ enum nl80211_survey_info { __NL80211_SURVEY_INFO_INVALID, NL80211_SURVEY_INFO_FREQUENCY, NL80211_SURVEY_INFO_NOISE, + NL80211_SURVEY_INFO_IN_USE, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index a0613ff62c9..ecc0403b918 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -293,12 +293,14 @@ struct key_params { * enum survey_info_flags - survey information flags * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in + * @SURVEY_INFO_IN_USE: channel is currently being used * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). */ enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, + SURVEY_INFO_IN_USE = 1<<1, }; /** diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9c84825803c..0087c4323c5 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3489,6 +3489,8 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, if (survey->filled & SURVEY_INFO_NOISE_DBM) NLA_PUT_U8(msg, NL80211_SURVEY_INFO_NOISE, survey->noise); + if (survey->filled & SURVEY_INFO_IN_USE) + NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE); nla_nest_end(msg, infoattr); -- cgit v1.2.3 From ea229e682633a18c1fa2c408400a6923cfc47910 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 30 Sep 2010 21:21:25 +0200 Subject: cfg80211: remove spurious __KERNEL__ ifdef The net/cfg80211.h header file isn't exported to userspace, so there's no need for any kind of __KERNEL__ protection in it. If it was exported, everything else in it would need protection as well, not just the logging stuff ... Cc:Joe Perches Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ecc0403b918..5f4d8acf7ab 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2553,8 +2553,6 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, enum nl80211_cqm_rssi_threshold_event rssi_event, gfp_t gfp); -#ifdef __KERNEL__ - /* Logging, debugging and troubleshooting/diagnostic helpers. */ /* wiphy_printk helpers, similar to dev_printk */ @@ -2601,6 +2599,4 @@ void cfg80211_cqm_rssi_notify(struct net_device *dev, #define wiphy_WARN(wiphy, format, args...) \ WARN(1, "wiphy: %s\n" format, wiphy_name(wiphy), ##args); -#endif - #endif /* __NET_CFG80211_H */ -- cgit v1.2.3 From 78be49ec2a0df34de9441930fdced20311fd709f Mon Sep 17 00:00:00 2001 From: Helmut Schaa Date: Sat, 2 Oct 2010 11:31:55 +0200 Subject: mac80211: distinct between max rates and the number of rates the hw can report Some drivers cannot handle multiple retry rates specified by the rc algorithm but instead use their own retry table (for example rt2800). However, if such a device registers itself with a max_rates value of 1 the rc algorithm cannot make use of the extended information the device can provide about retried rates. On the other hand, if a device registers itself with a max_rates value > 1 the rc algorithm assumes that the device can handle multi rate retries. Fix this issue by introducing another hw parameter max_report_rates that can be set to a different value then max_rates to indicate if a device is capable of reporting more rates then specified in max_rates. Signed-off-by: Helmut Schaa Signed-off-by: Ivo van Doorn Signed-off-by: John W. Linville --- include/net/mac80211.h | 6 +++++- net/mac80211/main.c | 4 ++++ net/mac80211/status.c | 2 +- 3 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index fe8b9dae4de..47316a653ae 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1109,7 +1109,10 @@ enum ieee80211_hw_flags { * @sta_data_size: size (in bytes) of the drv_priv data area * within &struct ieee80211_sta. * - * @max_rates: maximum number of alternate rate retry stages + * @max_rates: maximum number of alternate rate retry stages the hw + * can handle. + * @max_report_rates: maximum number of alternate rate retry stages + * the hw can report back. * @max_rate_tries: maximum number of tries for each stage * * @napi_weight: weight used for NAPI polling. You must specify an @@ -1131,6 +1134,7 @@ struct ieee80211_hw { u16 max_listen_interval; s8 max_signal; u8 max_rates; + u8 max_report_rates; u8 max_rate_tries; }; diff --git a/net/mac80211/main.c b/net/mac80211/main.c index e127fbb8424..9c2f3f934c7 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -537,6 +537,7 @@ struct ieee80211_hw *ieee80211_alloc_hw(size_t priv_data_len, /* set up some defaults */ local->hw.queues = 1; local->hw.max_rates = 1; + local->hw.max_report_rates = 0; local->hw.conf.long_frame_max_tx_count = wiphy->retry_long; local->hw.conf.short_frame_max_tx_count = wiphy->retry_short; local->user_power_level = -1; @@ -612,6 +613,9 @@ int ieee80211_register_hw(struct ieee80211_hw *hw) WLAN_CIPHER_SUITE_AES_CMAC }; + if (hw->max_report_rates == 0) + hw->max_report_rates = hw->max_rates; + /* * generic code guarantees at least one band, * set this very early because much code assumes diff --git a/net/mac80211/status.c b/net/mac80211/status.c index dd85006c4fe..95763e03697 100644 --- a/net/mac80211/status.c +++ b/net/mac80211/status.c @@ -176,7 +176,7 @@ void ieee80211_tx_status(struct ieee80211_hw *hw, struct sk_buff *skb) for (i = 0; i < IEEE80211_TX_MAX_RATES; i++) { /* the HW cannot have attempted that rate */ - if (i >= hw->max_rates) { + if (i >= hw->max_report_rates) { info->status.rates[i].idx = -1; info->status.rates[i].count = 0; } else if (info->status.rates[i].idx >= 0) { -- cgit v1.2.3 From ff4c92d85c6f2777d2067f8552e7fefb4d1754ae Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 4 Oct 2010 21:14:03 +0200 Subject: genetlink: introduce pre_doit/post_doit hooks Each family may have some amount of boilerplate locking code that applies to most, or even all, commands. This allows a family to handle such things in a more generic way, by allowing it to a) include private flags in each operation b) specify a pre_doit hook that is called, before an operation's doit() callback and may return an error directly, c) specify a post_doit hook that can undo locking or similar things done by pre_doit, and finally d) include two private pointers in each info struct passed between all these operations including doit(). (It's two because I'll need two in nl80211 -- can be extended.) Signed-off-by: Johannes Berg Acked-by: David S. Miller Signed-off-by: John W. Linville --- include/net/genetlink.h | 18 ++++++++++++++++++ net/netlink/genetlink.c | 14 +++++++++++++- 2 files changed, 31 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index f7dcd2c7041..8a64b811a39 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -20,6 +20,9 @@ struct genl_multicast_group { u32 id; }; +struct genl_ops; +struct genl_info; + /** * struct genl_family - generic netlink family * @id: protocol family idenfitier @@ -29,6 +32,10 @@ struct genl_multicast_group { * @maxattr: maximum number of attributes supported * @netnsok: set to true if the family can handle network * namespaces and should be presented in all of them + * @pre_doit: called before an operation's doit callback, it may + * do additional, common, filtering and return an error + * @post_doit: called after an operation's doit callback, it may + * undo operations done by pre_doit, for example release locks * @attrbuf: buffer to store parsed attributes * @ops_list: list of all assigned operations * @family_list: family list @@ -41,6 +48,12 @@ struct genl_family { unsigned int version; unsigned int maxattr; bool netnsok; + int (*pre_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); + void (*post_doit)(struct genl_ops *ops, + struct sk_buff *skb, + struct genl_info *info); struct nlattr ** attrbuf; /* private */ struct list_head ops_list; /* private */ struct list_head family_list; /* private */ @@ -55,6 +68,8 @@ struct genl_family { * @genlhdr: generic netlink message header * @userhdr: user specific header * @attrs: netlink attributes + * @_net: network namespace + * @user_ptr: user pointers */ struct genl_info { u32 snd_seq; @@ -66,6 +81,7 @@ struct genl_info { #ifdef CONFIG_NET_NS struct net * _net; #endif + void * user_ptr[2]; }; static inline struct net *genl_info_net(struct genl_info *info) @@ -81,6 +97,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) /** * struct genl_ops - generic netlink operations * @cmd: command identifier + * @internal_flags: flags used by the family * @flags: flags * @policy: attribute validation policy * @doit: standard command callback @@ -90,6 +107,7 @@ static inline void genl_info_net_set(struct genl_info *info, struct net *net) */ struct genl_ops { u8 cmd; + u8 internal_flags; unsigned int flags; const struct nla_policy *policy; int (*doit)(struct sk_buff *skb, diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 26ed3e8587c..1781d99145e 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -547,8 +547,20 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; info.attrs = family->attrbuf; genl_info_net_set(&info, net); + memset(&info.user_ptr, 0, sizeof(info.user_ptr)); - return ops->doit(skb, &info); + if (family->pre_doit) { + err = family->pre_doit(ops, skb, &info); + if (err) + return err; + } + + err = ops->doit(skb, &info); + + if (family->post_doit) + family->post_doit(ops, skb, &info); + + return err; } static void genl_rcv(struct sk_buff *skb) -- cgit v1.2.3 From d6bf781712a1d25cc8987036b3a48535b331eb91 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 4 Oct 2010 06:15:44 +0000 Subject: net neigh: RCU conversion of neigh hash table David This is the first step for RCU conversion of neigh code. Next patches will convert hash_buckets[] and "struct neighbour" to RCU protected objects. Thanks [PATCH net-next] net neigh: RCU conversion of neigh hash table Instead of storing hash_buckets, hash_mask and hash_rnd in "struct neigh_table", a new structure is defined : struct neigh_hash_table { struct neighbour **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; }; And "struct neigh_table" has an RCU protected pointer to such a neigh_hash_table. This means the signature of (*hash)() function changed: We need to add a third parameter with the actual hash_rnd value, since this is not anymore a neigh_table field. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 +++- net/atm/clip.c | 4 +- net/core/neighbour.c | 219 ++++++++++++++++++++++++++++++------------------ net/decnet/dn_neigh.c | 13 +-- net/ipv4/arp.c | 8 +- net/ipv6/ndisc.c | 10 ++- 6 files changed, 170 insertions(+), 100 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 7d08fd1062f..37845dae648 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -138,13 +138,22 @@ struct pneigh_entry { * neighbour table manipulation */ +struct neigh_hash_table { + struct neighbour **hash_buckets; + unsigned int hash_mask; + __u32 hash_rnd; + struct rcu_head rcu; +}; + struct neigh_table { struct neigh_table *next; int family; int entry_size; int key_len; - __u32 (*hash)(const void *pkey, const struct net_device *); + __u32 (*hash)(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd); int (*constructor)(struct neighbour *); int (*pconstructor)(struct pneigh_entry *); void (*pdestructor)(struct pneigh_entry *); @@ -165,9 +174,7 @@ struct neigh_table { unsigned long last_rand; struct kmem_cache *kmem_cachep; struct neigh_statistics __percpu *stats; - struct neighbour **hash_buckets; - unsigned int hash_mask; - __u32 hash_rnd; + struct neigh_hash_table __rcu *nht; struct pneigh_entry **phash_buckets; }; @@ -237,6 +244,7 @@ extern void pneigh_for_each(struct neigh_table *tbl, void (*cb)(struct pneigh_en struct neigh_seq_state { struct seq_net_private p; struct neigh_table *tbl; + struct neigh_hash_table *nht; void *(*neigh_sub_iter)(struct neigh_seq_state *state, struct neighbour *n, loff_t *pos); unsigned int bucket; diff --git a/net/atm/clip.c b/net/atm/clip.c index 95fdd118506..ff956d1115b 100644 --- a/net/atm/clip.c +++ b/net/atm/clip.c @@ -310,9 +310,9 @@ static int clip_constructor(struct neighbour *neigh) return 0; } -static u32 clip_hash(const void *pkey, const struct net_device *dev) +static u32 clip_hash(const void *pkey, const struct net_device *dev, __u32 rnd) { - return jhash_2words(*(u32 *) pkey, dev->ifindex, clip_tbl.hash_rnd); + return jhash_2words(*(u32 *) pkey, dev->ifindex, rnd); } static struct neigh_table clip_tbl = { diff --git a/net/core/neighbour.c b/net/core/neighbour.c index d6996e072a4..dd8920e4f50 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -131,14 +131,17 @@ static int neigh_forced_gc(struct neigh_table *tbl) { int shrunk = 0; int i; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, forced_gc_runs); write_lock_bh(&tbl->lock); - for (i = 0; i <= tbl->hash_mask; i++) { + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + for (i = 0; i <= nht->hash_mask; i++) { struct neighbour *n, **np; - np = &tbl->hash_buckets[i]; + np = &nht->hash_buckets[i]; while ((n = *np) != NULL) { /* Neighbour record may be discarded if: * - nobody refers to it. @@ -199,9 +202,13 @@ static void pneigh_queue_purge(struct sk_buff_head *list) static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) { int i; + struct neigh_hash_table *nht; - for (i = 0; i <= tbl->hash_mask; i++) { - struct neighbour *n, **np = &tbl->hash_buckets[i]; + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + + for (i = 0; i <= nht->hash_mask; i++) { + struct neighbour *n, **np = &nht->hash_buckets[i]; while ((n = *np) != NULL) { if (dev && n->dev != dev) { @@ -297,64 +304,81 @@ out_entries: goto out; } -static struct neighbour **neigh_hash_alloc(unsigned int entries) +static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) { - unsigned long size = entries * sizeof(struct neighbour *); - struct neighbour **ret; + size_t size = entries * sizeof(struct neighbour *); + struct neigh_hash_table *ret; + struct neighbour **buckets; - if (size <= PAGE_SIZE) { - ret = kzalloc(size, GFP_ATOMIC); - } else { - ret = (struct neighbour **) - __get_free_pages(GFP_ATOMIC|__GFP_ZERO, get_order(size)); + ret = kmalloc(sizeof(*ret), GFP_ATOMIC); + if (!ret) + return NULL; + if (size <= PAGE_SIZE) + buckets = kzalloc(size, GFP_ATOMIC); + else + buckets = (struct neighbour **) + __get_free_pages(GFP_ATOMIC | __GFP_ZERO, + get_order(size)); + if (!buckets) { + kfree(ret); + return NULL; } + ret->hash_buckets = buckets; + ret->hash_mask = entries - 1; + get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; } -static void neigh_hash_free(struct neighbour **hash, unsigned int entries) +static void neigh_hash_free_rcu(struct rcu_head *head) { - unsigned long size = entries * sizeof(struct neighbour *); + struct neigh_hash_table *nht = container_of(head, + struct neigh_hash_table, + rcu); + size_t size = (nht->hash_mask + 1) * sizeof(struct neighbour *); + struct neighbour **buckets = nht->hash_buckets; if (size <= PAGE_SIZE) - kfree(hash); + kfree(buckets); else - free_pages((unsigned long)hash, get_order(size)); + free_pages((unsigned long)buckets, get_order(size)); + kfree(nht); } -static void neigh_hash_grow(struct neigh_table *tbl, unsigned long new_entries) +static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, + unsigned long new_entries) { - struct neighbour **new_hash, **old_hash; - unsigned int i, new_hash_mask, old_entries; + unsigned int i, hash; + struct neigh_hash_table *new_nht, *old_nht; NEIGH_CACHE_STAT_INC(tbl, hash_grows); BUG_ON(!is_power_of_2(new_entries)); - new_hash = neigh_hash_alloc(new_entries); - if (!new_hash) - return; + old_nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + new_nht = neigh_hash_alloc(new_entries); + if (!new_nht) + return old_nht; - old_entries = tbl->hash_mask + 1; - new_hash_mask = new_entries - 1; - old_hash = tbl->hash_buckets; - - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); - for (i = 0; i < old_entries; i++) { + for (i = 0; i <= old_nht->hash_mask; i++) { struct neighbour *n, *next; - for (n = old_hash[i]; n; n = next) { - unsigned int hash_val = tbl->hash(n->primary_key, n->dev); + for (n = old_nht->hash_buckets[i]; + n != NULL; + n = next) { + hash = tbl->hash(n->primary_key, n->dev, + new_nht->hash_rnd); - hash_val &= new_hash_mask; + hash &= new_nht->hash_mask; next = n->next; - n->next = new_hash[hash_val]; - new_hash[hash_val] = n; + n->next = new_nht->hash_buckets[hash]; + new_nht->hash_buckets[hash] = n; } } - tbl->hash_buckets = new_hash; - tbl->hash_mask = new_hash_mask; - neigh_hash_free(old_hash, old_entries); + rcu_assign_pointer(tbl->nht, new_nht); + call_rcu(&old_nht->rcu, neigh_hash_free_rcu); + return new_nht; } struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, @@ -363,19 +387,23 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, struct neighbour *n; int key_len = tbl->key_len; u32 hash_val; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); - read_lock_bh(&tbl->lock); - hash_val = tbl->hash(pkey, dev); - for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; + read_lock(&tbl->lock); + for (n = nht->hash_buckets[hash_val]; n; n = n->next) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { neigh_hold(n); NEIGH_CACHE_STAT_INC(tbl, hits); break; } } - read_unlock_bh(&tbl->lock); + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); return n; } EXPORT_SYMBOL(neigh_lookup); @@ -386,12 +414,15 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, struct neighbour *n; int key_len = tbl->key_len; u32 hash_val; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, lookups); - read_lock_bh(&tbl->lock); - hash_val = tbl->hash(pkey, NULL); - for (n = tbl->hash_buckets[hash_val & tbl->hash_mask]; n; n = n->next) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; + read_lock(&tbl->lock); + for (n = nht->hash_buckets[hash_val]; n; n = n->next) { if (!memcmp(n->primary_key, pkey, key_len) && net_eq(dev_net(n->dev), net)) { neigh_hold(n); @@ -399,7 +430,8 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, break; } } - read_unlock_bh(&tbl->lock); + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); return n; } EXPORT_SYMBOL(neigh_lookup_nodev); @@ -411,6 +443,7 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, int key_len = tbl->key_len; int error; struct neighbour *n1, *rc, *n = neigh_alloc(tbl); + struct neigh_hash_table *nht; if (!n) { rc = ERR_PTR(-ENOBUFS); @@ -437,18 +470,20 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, n->confirmed = jiffies - (n->parms->base_reachable_time << 1); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); - if (atomic_read(&tbl->entries) > (tbl->hash_mask + 1)) - neigh_hash_grow(tbl, (tbl->hash_mask + 1) << 1); + if (atomic_read(&tbl->entries) > (nht->hash_mask + 1)) + nht = neigh_hash_grow(tbl, (nht->hash_mask + 1) << 1); - hash_val = tbl->hash(pkey, dev) & tbl->hash_mask; + hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; if (n->parms->dead) { rc = ERR_PTR(-EINVAL); goto out_tbl_unlock; } - for (n1 = tbl->hash_buckets[hash_val]; n1; n1 = n1->next) { + for (n1 = nht->hash_buckets[hash_val]; n1; n1 = n1->next) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); rc = n1; @@ -456,8 +491,8 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, } } - n->next = tbl->hash_buckets[hash_val]; - tbl->hash_buckets[hash_val] = n; + n->next = nht->hash_buckets[hash_val]; + nht->hash_buckets[hash_val] = n; n->dead = 0; neigh_hold(n); write_unlock_bh(&tbl->lock); @@ -698,10 +733,13 @@ static void neigh_periodic_work(struct work_struct *work) struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); struct neighbour *n, **np; unsigned int i; + struct neigh_hash_table *nht; NEIGH_CACHE_STAT_INC(tbl, periodic_gc_runs); write_lock_bh(&tbl->lock); + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); /* * periodically recompute ReachableTime from random function @@ -715,8 +753,8 @@ static void neigh_periodic_work(struct work_struct *work) neigh_rand_reach_time(p->base_reachable_time); } - for (i = 0 ; i <= tbl->hash_mask; i++) { - np = &tbl->hash_buckets[i]; + for (i = 0 ; i <= nht->hash_mask; i++) { + np = &nht->hash_buckets[i]; while ((n = *np) != NULL) { unsigned int state; @@ -1438,17 +1476,14 @@ void neigh_table_init_no_netlink(struct neigh_table *tbl) panic("cannot create neighbour proc dir entry"); #endif - tbl->hash_mask = 1; - tbl->hash_buckets = neigh_hash_alloc(tbl->hash_mask + 1); + tbl->nht = neigh_hash_alloc(8); phsize = (PNEIGH_HASHMASK + 1) * sizeof(struct pneigh_entry *); tbl->phash_buckets = kzalloc(phsize, GFP_KERNEL); - if (!tbl->hash_buckets || !tbl->phash_buckets) + if (!tbl->nht || !tbl->phash_buckets) panic("cannot allocate neighbour cache hashes"); - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); - rwlock_init(&tbl->lock); INIT_DELAYED_WORK_DEFERRABLE(&tbl->gc_work, neigh_periodic_work); schedule_delayed_work(&tbl->gc_work, tbl->parms.reachable_time); @@ -1504,8 +1539,8 @@ int neigh_table_clear(struct neigh_table *tbl) } write_unlock(&neigh_tbl_lock); - neigh_hash_free(tbl->hash_buckets, tbl->hash_mask + 1); - tbl->hash_buckets = NULL; + call_rcu(&tbl->nht->rcu, neigh_hash_free_rcu); + tbl->nht = NULL; kfree(tbl->phash_buckets); tbl->phash_buckets = NULL; @@ -1745,18 +1780,22 @@ static int neightbl_fill_info(struct sk_buff *skb, struct neigh_table *tbl, unsigned long now = jiffies; unsigned int flush_delta = now - tbl->last_flush; unsigned int rand_delta = now - tbl->last_rand; - + struct neigh_hash_table *nht; struct ndt_config ndc = { .ndtc_key_len = tbl->key_len, .ndtc_entry_size = tbl->entry_size, .ndtc_entries = atomic_read(&tbl->entries), .ndtc_last_flush = jiffies_to_msecs(flush_delta), .ndtc_last_rand = jiffies_to_msecs(rand_delta), - .ndtc_hash_rnd = tbl->hash_rnd, - .ndtc_hash_mask = tbl->hash_mask, .ndtc_proxy_qlen = tbl->proxy_queue.qlen, }; + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + ndc.ndtc_hash_rnd = nht->hash_rnd; + ndc.ndtc_hash_mask = nht->hash_mask; + rcu_read_unlock_bh(); + NLA_PUT(skb, NDTA_CONFIG, sizeof(ndc), &ndc); } @@ -2088,14 +2127,18 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; + struct neigh_hash_table *nht; - read_lock_bh(&tbl->lock); - for (h = 0; h <= tbl->hash_mask; h++) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + + read_lock(&tbl->lock); + for (h = 0; h <= nht->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; - for (n = tbl->hash_buckets[h], idx = 0; n; n = n->next) { + for (n = nht->hash_buckets[h], idx = 0; n; n = n->next) { if (!net_eq(dev_net(n->dev), net)) continue; if (idx < s_idx) @@ -2104,7 +2147,6 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { - read_unlock_bh(&tbl->lock); rc = -1; goto out; } @@ -2112,9 +2154,10 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, idx++; } } - read_unlock_bh(&tbl->lock); rc = skb->len; out: + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); cb->args[1] = h; cb->args[2] = idx; return rc; @@ -2147,15 +2190,20 @@ static int neigh_dump_info(struct sk_buff *skb, struct netlink_callback *cb) void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void *), void *cookie) { int chain; + struct neigh_hash_table *nht; - read_lock_bh(&tbl->lock); - for (chain = 0; chain <= tbl->hash_mask; chain++) { + rcu_read_lock_bh(); + nht = rcu_dereference_bh(tbl->nht); + + read_lock(&tbl->lock); + for (chain = 0; chain <= nht->hash_mask; chain++) { struct neighbour *n; - for (n = tbl->hash_buckets[chain]; n; n = n->next) + for (n = nht->hash_buckets[chain]; n; n = n->next) cb(n, cookie); } - read_unlock_bh(&tbl->lock); + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_for_each); @@ -2164,11 +2212,14 @@ void __neigh_for_each_release(struct neigh_table *tbl, int (*cb)(struct neighbour *)) { int chain; + struct neigh_hash_table *nht; - for (chain = 0; chain <= tbl->hash_mask; chain++) { + nht = rcu_dereference_protected(tbl->nht, + lockdep_is_held(&tbl->lock)); + for (chain = 0; chain <= nht->hash_mask; chain++) { struct neighbour *n, **np; - np = &tbl->hash_buckets[chain]; + np = &nht->hash_buckets[chain]; while ((n = *np) != NULL) { int release; @@ -2193,13 +2244,13 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); - struct neigh_table *tbl = state->tbl; + struct neigh_hash_table *nht = state->nht; struct neighbour *n = NULL; int bucket = state->bucket; state->flags &= ~NEIGH_SEQ_IS_PNEIGH; - for (bucket = 0; bucket <= tbl->hash_mask; bucket++) { - n = tbl->hash_buckets[bucket]; + for (bucket = 0; bucket <= nht->hash_mask; bucket++) { + n = nht->hash_buckets[bucket]; while (n) { if (!net_eq(dev_net(n->dev), net)) @@ -2234,7 +2285,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, { struct neigh_seq_state *state = seq->private; struct net *net = seq_file_net(seq); - struct neigh_table *tbl = state->tbl; + struct neigh_hash_table *nht = state->nht; if (state->neigh_sub_iter) { void *v = state->neigh_sub_iter(state, n, pos); @@ -2265,10 +2316,10 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (n) break; - if (++state->bucket > tbl->hash_mask) + if (++state->bucket > nht->hash_mask) break; - n = tbl->hash_buckets[state->bucket]; + n = nht->hash_buckets[state->bucket]; } if (n && pos) @@ -2367,6 +2418,7 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) __acquires(tbl->lock) + __acquires(rcu_bh) { struct neigh_seq_state *state = seq->private; @@ -2374,8 +2426,9 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl state->bucket = 0; state->flags = (neigh_seq_flags & ~NEIGH_SEQ_IS_PNEIGH); - read_lock_bh(&tbl->lock); - + rcu_read_lock_bh(); + state->nht = rcu_dereference_bh(tbl->nht); + read_lock(&tbl->lock); return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } EXPORT_SYMBOL(neigh_seq_start); @@ -2409,11 +2462,13 @@ EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) __releases(tbl->lock) + __releases(rcu_bh) { struct neigh_seq_state *state = seq->private; struct neigh_table *tbl = state->tbl; - read_unlock_bh(&tbl->lock); + read_unlock(&tbl->lock); + rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_seq_stop); diff --git a/net/decnet/dn_neigh.c b/net/decnet/dn_neigh.c index 0363bb95cc7..a085dbcf5c7 100644 --- a/net/decnet/dn_neigh.c +++ b/net/decnet/dn_neigh.c @@ -48,7 +48,6 @@ #include #include -static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev); static int dn_neigh_construct(struct neighbour *); static void dn_long_error_report(struct neighbour *, struct sk_buff *); static void dn_short_error_report(struct neighbour *, struct sk_buff *); @@ -93,6 +92,13 @@ static const struct neigh_ops dn_phase3_ops = { .queue_xmit = dev_queue_xmit }; +static u32 dn_neigh_hash(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd) +{ + return jhash_2words(*(__u16 *)pkey, 0, hash_rnd); +} + struct neigh_table dn_neigh_table = { .family = PF_DECnet, .entry_size = sizeof(struct dn_neigh), @@ -122,11 +128,6 @@ struct neigh_table dn_neigh_table = { .gc_thresh3 = 1024, }; -static u32 dn_neigh_hash(const void *pkey, const struct net_device *dev) -{ - return jhash_2words(*(__u16 *)pkey, 0, dn_neigh_table.hash_rnd); -} - static int dn_neigh_construct(struct neighbour *neigh) { struct net_device *dev = neigh->dev; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index d9031ad6782..f3530957817 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(clip_tbl_hook); /* * Interface to generic neighbour cache. */ -static u32 arp_hash(const void *pkey, const struct net_device *dev); +static u32 arp_hash(const void *pkey, const struct net_device *dev, __u32 rnd); static int arp_constructor(struct neighbour *neigh); static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb); static void arp_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -225,9 +225,11 @@ int arp_mc_map(__be32 addr, u8 *haddr, struct net_device *dev, int dir) } -static u32 arp_hash(const void *pkey, const struct net_device *dev) +static u32 arp_hash(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd) { - return jhash_2words(*(u32 *)pkey, dev->ifindex, arp_tbl.hash_rnd); + return jhash_2words(*(u32 *)pkey, dev->ifindex, hash_rnd); } static int arp_constructor(struct neighbour *neigh) diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b3dd844cd34..998d6d27e7c 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -91,7 +91,9 @@ #include #include -static u32 ndisc_hash(const void *pkey, const struct net_device *dev); +static u32 ndisc_hash(const void *pkey, + const struct net_device *dev, + __u32 rnd); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -350,7 +352,9 @@ int ndisc_mc_map(struct in6_addr *addr, char *buf, struct net_device *dev, int d EXPORT_SYMBOL(ndisc_mc_map); -static u32 ndisc_hash(const void *pkey, const struct net_device *dev) +static u32 ndisc_hash(const void *pkey, + const struct net_device *dev, + __u32 hash_rnd) { const u32 *p32 = pkey; u32 addr_hash, i; @@ -359,7 +363,7 @@ static u32 ndisc_hash(const void *pkey, const struct net_device *dev) for (i = 0; i < (sizeof(struct in6_addr) / sizeof(u32)); i++) addr_hash ^= *p32++; - return jhash_2words(addr_hash, dev->ifindex, nd_tbl.hash_rnd); + return jhash_2words(addr_hash, dev->ifindex, hash_rnd); } static int ndisc_constructor(struct neighbour *neigh) -- cgit v1.2.3 From ebc0ffae5dfb4447e0a431ffe7fe1d467c48bbb9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Oct 2010 10:41:36 +0000 Subject: fib: RCU conversion of fib_lookup() fib_lookup() converted to be called in RCU protected context, no reference taken and released on a contended cache line (fib_clntref) fib_table_lookup() and fib_semantic_match() get an additional parameter. struct fib_info gets an rcu_head field, and is freed after an rcu grace period. Stress test : (Sending 160.000.000 UDP frames on same neighbour, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_HASH) (about same results for FIB_TRIE) Before patch : real 1m31.199s user 0m13.761s sys 23m24.780s After patch: real 1m5.375s user 0m14.997s sys 15m50.115s Before patch Profile : 13044.00 15.4% __ip_route_output_key vmlinux 8438.00 10.0% dst_destroy vmlinux 5983.00 7.1% fib_semantic_match vmlinux 5410.00 6.4% fib_rules_lookup vmlinux 4803.00 5.7% neigh_lookup vmlinux 4420.00 5.2% _raw_spin_lock vmlinux 3883.00 4.6% rt_set_nexthop vmlinux 3261.00 3.9% _raw_read_lock vmlinux 2794.00 3.3% fib_table_lookup vmlinux 2374.00 2.8% neigh_resolve_output vmlinux 2153.00 2.5% dst_alloc vmlinux 1502.00 1.8% _raw_read_lock_bh vmlinux 1484.00 1.8% kmem_cache_alloc vmlinux 1407.00 1.7% eth_header vmlinux 1406.00 1.7% ipv4_dst_destroy vmlinux 1298.00 1.5% __copy_from_user_ll vmlinux 1174.00 1.4% dev_queue_xmit vmlinux 1000.00 1.2% ip_output vmlinux After patch Profile : 13712.00 15.8% dst_destroy vmlinux 8548.00 9.9% __ip_route_output_key vmlinux 7017.00 8.1% neigh_lookup vmlinux 4554.00 5.3% fib_semantic_match vmlinux 4067.00 4.7% _raw_read_lock vmlinux 3491.00 4.0% dst_alloc vmlinux 3186.00 3.7% neigh_resolve_output vmlinux 3103.00 3.6% fib_table_lookup vmlinux 2098.00 2.4% _raw_read_lock_bh vmlinux 2081.00 2.4% kmem_cache_alloc vmlinux 2013.00 2.3% _raw_spin_lock vmlinux 1763.00 2.0% __copy_from_user_ll vmlinux 1763.00 2.0% ip_output vmlinux 1761.00 2.0% ipv4_dst_destroy vmlinux 1631.00 1.9% eth_header vmlinux 1440.00 1.7% _raw_read_unlock_bh vmlinux Reference results, if IP route cache is enabled : real 0m29.718s user 0m10.845s sys 7m37.341s 25213.00 29.5% __ip_route_output_key vmlinux 9011.00 10.5% dst_release vmlinux 4817.00 5.6% ip_push_pending_frames vmlinux 4232.00 5.0% ip_finish_output vmlinux 3940.00 4.6% udp_sendmsg vmlinux 3730.00 4.4% __copy_from_user_ll vmlinux 3716.00 4.4% ip_route_output_flow vmlinux 2451.00 2.9% __xfrm_lookup vmlinux 2221.00 2.6% ip_append_data vmlinux 1718.00 2.0% _raw_spin_lock_bh vmlinux 1655.00 1.9% __alloc_skb vmlinux 1572.00 1.8% sock_wfree vmlinux 1345.00 1.6% kfree vmlinux Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/fib_rules.h | 2 ++ include/net/ip_fib.h | 17 ++++---------- net/core/fib_rules.c | 3 ++- net/ipv4/fib_frontend.c | 27 +++++++++++----------- net/ipv4/fib_hash.c | 5 ++-- net/ipv4/fib_lookup.h | 2 +- net/ipv4/fib_rules.c | 3 ++- net/ipv4/fib_semantics.c | 21 +++++++++++++---- net/ipv4/fib_trie.c | 10 ++++---- net/ipv4/route.c | 59 ++++++++++++++++++++---------------------------- 10 files changed, 72 insertions(+), 77 deletions(-) (limited to 'include/net') diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index ac2fd002812..106f3097d38 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -31,6 +31,8 @@ struct fib_lookup_arg { void *lookup_ptr; void *result; struct fib_rule *rule; + int flags; +#define FIB_LOOKUP_NOREF 1 }; struct fib_rules_ops { diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c93f94edc61..ba3666d3176 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -86,6 +86,7 @@ struct fib_info { #ifdef CONFIG_IP_ROUTE_MULTIPATH int fib_power; #endif + struct rcu_head rcu; struct fib_nh fib_nh[0]; #define fib_dev fib_nh[0].nh_dev }; @@ -148,7 +149,7 @@ struct fib_table { }; extern int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res); + struct fib_result *res, int fib_flags); extern int fib_table_insert(struct fib_table *, struct fib_config *); extern int fib_table_delete(struct fib_table *, struct fib_config *); extern int fib_table_dump(struct fib_table *table, struct sk_buff *skb, @@ -185,11 +186,11 @@ static inline int fib_lookup(struct net *net, const struct flowi *flp, struct fib_table *table; table = fib_get_table(net, RT_TABLE_LOCAL); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; table = fib_get_table(net, RT_TABLE_MAIN); - if (!fib_table_lookup(table, flp, res)) + if (!fib_table_lookup(table, flp, res, FIB_LOOKUP_NOREF)) return 0; return -ENETUNREACH; } @@ -254,16 +255,6 @@ static inline void fib_info_put(struct fib_info *fi) free_fib_info(fi); } -static inline void fib_res_put(struct fib_result *res) -{ - if (res->fi) - fib_info_put(res->fi); -#ifdef CONFIG_IP_MULTIPLE_TABLES - if (res->r) - fib_rule_put(res->r); -#endif -} - #ifdef CONFIG_PROC_FS extern int __net_init fib_proc_init(struct net *net); extern void __net_exit fib_proc_exit(struct net *net); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index cfb7d25c172..21698f8c49e 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -225,7 +225,8 @@ jumped: err = ops->action(rule, fl, flags, arg); if (err != -EAGAIN) { - if (likely(atomic_inc_not_zero(&rule->refcnt))) { + if ((arg->flags & FIB_LOOKUP_NOREF) || + likely(atomic_inc_not_zero(&rule->refcnt))) { arg->rule = rule; goto out; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index b05c23b05a9..919f2ad19b4 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -168,8 +168,11 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) struct fib_result res = { 0 }; struct net_device *dev = NULL; - if (fib_lookup(net, &fl, &res)) + rcu_read_lock(); + if (fib_lookup(net, &fl, &res)) { + rcu_read_unlock(); return NULL; + } if (res.type != RTN_LOCAL) goto out; dev = FIB_RES_DEV(res); @@ -177,7 +180,7 @@ struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref) if (dev && devref) dev_hold(dev); out: - fib_res_put(&res); + rcu_read_unlock(); return dev; } EXPORT_SYMBOL(__ip_dev_find); @@ -207,11 +210,12 @@ static inline unsigned __inet_dev_addr_type(struct net *net, local_table = fib_get_table(net, RT_TABLE_LOCAL); if (local_table) { ret = RTN_UNICAST; - if (!fib_table_lookup(local_table, &fl, &res)) { + rcu_read_lock(); + if (!fib_table_lookup(local_table, &fl, &res, FIB_LOOKUP_NOREF)) { if (!dev || dev == res.fi->fib_dev) ret = res.type; - fib_res_put(&res); } + rcu_read_unlock(); } return ret; } @@ -235,6 +239,7 @@ EXPORT_SYMBOL(inet_dev_addr_type); * - figure out what "logical" interface this packet arrived * and calculate "specific destination" address. * - check, that packet arrived from expected physical interface. + * called with rcu_read_lock() */ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net_device *dev, __be32 *spec_dst, @@ -259,7 +264,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, struct net *net; no_addr = rpf = accept_local = 0; - rcu_read_lock(); in_dev = __in_dev_get_rcu(dev); if (in_dev) { no_addr = in_dev->ifa_list == NULL; @@ -268,7 +272,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, if (mark && !IN_DEV_SRC_VMARK(in_dev)) fl.mark = 0; } - rcu_read_unlock(); if (in_dev == NULL) goto e_inval; @@ -278,7 +281,7 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, goto last_resort; if (res.type != RTN_UNICAST) { if (res.type != RTN_LOCAL || !accept_local) - goto e_inval_res; + goto e_inval; } *spec_dst = FIB_RES_PREFSRC(res); fib_combine_itag(itag, &res); @@ -299,10 +302,8 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, #endif if (dev_match) { ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; - fib_res_put(&res); return ret; } - fib_res_put(&res); if (no_addr) goto last_resort; if (rpf == 1) @@ -315,7 +316,6 @@ int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, *spec_dst = FIB_RES_PREFSRC(res); ret = FIB_RES_NH(res).nh_scope >= RT_SCOPE_HOST; } - fib_res_put(&res); } return ret; @@ -326,8 +326,6 @@ last_resort: *itag = 0; return 0; -e_inval_res: - fib_res_put(&res); e_inval: return -EINVAL; e_rpf: @@ -873,15 +871,16 @@ static void nl_fib_lookup(struct fib_result_nl *frn, struct fib_table *tb) local_bh_disable(); frn->tb_id = tb->tb_id; - frn->err = fib_table_lookup(tb, &fl, &res); + rcu_read_lock(); + frn->err = fib_table_lookup(tb, &fl, &res, FIB_LOOKUP_NOREF); if (!frn->err) { frn->prefixlen = res.prefixlen; frn->nh_sel = res.nh_sel; frn->type = res.type; frn->scope = res.scope; - fib_res_put(&res); } + rcu_read_unlock(); local_bh_enable(); } } diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 4ed7e0dea1b..83cca68e259 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -244,7 +244,8 @@ fn_new_zone(struct fn_hash *table, int z) } int fib_table_lookup(struct fib_table *tb, - const struct flowi *flp, struct fib_result *res) + const struct flowi *flp, struct fib_result *res, + int fib_flags) { int err; struct fn_zone *fz; @@ -264,7 +265,7 @@ int fib_table_lookup(struct fib_table *tb, err = fib_semantic_match(&f->fn_alias, flp, res, - fz->fz_order); + fz->fz_order, fib_flags); if (err <= 0) goto out; } diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 637b133973b..b9c9a9f2aee 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -22,7 +22,7 @@ struct fib_alias { /* Exported by fib_semantics.c */ extern int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen); + struct fib_result *res, int prefixlen, int fib_flags); extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 32300521e32..7981a24f5c7 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -57,6 +57,7 @@ int fib_lookup(struct net *net, struct flowi *flp, struct fib_result *res) { struct fib_lookup_arg arg = { .result = res, + .flags = FIB_LOOKUP_NOREF, }; int err; @@ -94,7 +95,7 @@ static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, if (!tbl) goto errout; - err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result); + err = fib_table_lookup(tbl, flp, (struct fib_result *) arg->result, arg->flags); if (err > 0) err = -EAGAIN; errout: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ba52f399a89..0f80dfc2f7f 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -148,6 +148,13 @@ static const struct /* Release a nexthop info record */ +static void free_fib_info_rcu(struct rcu_head *head) +{ + struct fib_info *fi = container_of(head, struct fib_info, rcu); + + kfree(fi); +} + void free_fib_info(struct fib_info *fi) { if (fi->fib_dead == 0) { @@ -161,7 +168,7 @@ void free_fib_info(struct fib_info *fi) } endfor_nexthops(fi); fib_info_cnt--; release_net(fi->fib_net); - kfree(fi); + call_rcu(&fi->rcu, free_fib_info_rcu); } void fib_release_info(struct fib_info *fi) @@ -553,6 +560,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, nh->nh_scope = RT_SCOPE_LINK; return 0; } + rcu_read_lock(); { struct flowi fl = { .nl_u = { @@ -568,8 +576,10 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, if (fl.fl4_scope < RT_SCOPE_LINK) fl.fl4_scope = RT_SCOPE_LINK; err = fib_lookup(net, &fl, &res); - if (err) + if (err) { + rcu_read_unlock(); return err; + } } err = -EINVAL; if (res.type != RTN_UNICAST && res.type != RTN_LOCAL) @@ -585,7 +595,7 @@ static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, goto out; err = 0; out: - fib_res_put(&res); + rcu_read_unlock(); return err; } else { struct in_device *in_dev; @@ -879,7 +889,7 @@ failure: /* Note! fib_semantic_match intentionally uses RCU list functions. */ int fib_semantic_match(struct list_head *head, const struct flowi *flp, - struct fib_result *res, int prefixlen) + struct fib_result *res, int prefixlen, int fib_flags) { struct fib_alias *fa; int nh_sel = 0; @@ -943,7 +953,8 @@ out_fill_res: res->type = fa->fa_type; res->scope = fa->fa_scope; res->fi = fa->fa_info; - atomic_inc(&res->fi->fib_clntref); + if (!(fib_flags & FIB_LOOKUP_NOREF)) + atomic_inc(&res->fi->fib_clntref); return 0; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index a96e5ec211a..271c89bdf04 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1342,7 +1342,7 @@ err: /* should be called with rcu_read_lock */ static int check_leaf(struct trie *t, struct leaf *l, t_key key, const struct flowi *flp, - struct fib_result *res) + struct fib_result *res, int fib_flags) { struct leaf_info *li; struct hlist_head *hhead = &l->list; @@ -1356,7 +1356,7 @@ static int check_leaf(struct trie *t, struct leaf *l, if (l->key != (key & ntohl(mask))) continue; - err = fib_semantic_match(&li->falh, flp, res, plen); + err = fib_semantic_match(&li->falh, flp, res, plen, fib_flags); #ifdef CONFIG_IP_FIB_TRIE_STATS if (err <= 0) @@ -1372,7 +1372,7 @@ static int check_leaf(struct trie *t, struct leaf *l, } int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, - struct fib_result *res) + struct fib_result *res, int fib_flags) { struct trie *t = (struct trie *) tb->tb_data; int ret; @@ -1399,7 +1399,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, /* Just a leaf? */ if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res); + ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); goto found; } @@ -1424,7 +1424,7 @@ int fib_table_lookup(struct fib_table *tb, const struct flowi *flp, } if (IS_LEAF(n)) { - ret = check_leaf(t, (struct leaf *)n, key, flp, res); + ret = check_leaf(t, (struct leaf *)n, key, flp, res, fib_flags); if (ret > 0) goto backtrace; goto found; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 04e0df82b88..7864d0c4896 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1773,12 +1773,15 @@ void ip_rt_get_source(u8 *addr, struct rtable *rt) if (rt->fl.iif == 0) src = rt->rt_src; - else if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) { - src = FIB_RES_PREFSRC(res); - fib_res_put(&res); - } else - src = inet_select_addr(rt->dst.dev, rt->rt_gateway, + else { + rcu_read_lock(); + if (fib_lookup(dev_net(rt->dst.dev), &rt->fl, &res) == 0) + src = FIB_RES_PREFSRC(res); + else + src = inet_select_addr(rt->dst.dev, rt->rt_gateway, RT_SCOPE_UNIVERSE); + rcu_read_unlock(); + } memcpy(addr, &src, 4); } @@ -2081,6 +2084,7 @@ static int ip_mkroute_input(struct sk_buff *skb, * Such approach solves two big problems: * 1. Not simplex devices are handled properly. * 2. IP spoofing attempts are filtered with 100% of guarantee. + * called with rcu_read_lock() */ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2102,7 +2106,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, unsigned hash; __be32 spec_dst; int err = -EINVAL; - int free_res = 0; struct net * net = dev_net(dev); /* IP on this device is disabled. */ @@ -2134,12 +2137,12 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, /* * Now we are ready to route packet. */ - if ((err = fib_lookup(net, &fl, &res)) != 0) { + err = fib_lookup(net, &fl, &res); + if (err != 0) { if (!IN_DEV_FORWARD(in_dev)) goto e_hostunreach; goto no_route; } - free_res = 1; RT_CACHE_STAT_INC(in_slow_tot); @@ -2148,8 +2151,8 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(saddr, daddr, tos, - net->loopback_dev->ifindex, - dev, &spec_dst, &itag, skb->mark); + net->loopback_dev->ifindex, + dev, &spec_dst, &itag, skb->mark); if (err < 0) goto martian_source_keep_err; if (err) @@ -2164,9 +2167,6 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, goto martian_destination; err = ip_mkroute_input(skb, &res, &fl, in_dev, daddr, saddr, tos); -done: - if (free_res) - fib_res_put(&res); out: return err; brd_input: @@ -2226,7 +2226,7 @@ local_input: rth->rt_type = res.type; hash = rt_hash(daddr, saddr, fl.iif, rt_genid(net)); err = rt_intern_hash(hash, rth, NULL, skb, fl.iif); - goto done; + goto out; no_route: RT_CACHE_STAT_INC(in_no_route); @@ -2249,21 +2249,21 @@ martian_destination: e_hostunreach: err = -EHOSTUNREACH; - goto done; + goto out; e_inval: err = -EINVAL; - goto done; + goto out; e_nobufs: err = -ENOBUFS; - goto done; + goto out; martian_source: err = -EINVAL; martian_source_keep_err: ip_handle_martian_source(dev, in_dev, skb, daddr, saddr); - goto done; + goto out; } int ip_route_input_common(struct sk_buff *skb, __be32 daddr, __be32 saddr, @@ -2349,6 +2349,7 @@ skip_cache: } EXPORT_SYMBOL(ip_route_input_common); +/* called with rcu_read_lock() */ static int __mkroute_output(struct rtable **result, struct fib_result *res, const struct flowi *fl, @@ -2373,18 +2374,13 @@ static int __mkroute_output(struct rtable **result, if (dev_out->flags & IFF_LOOPBACK) flags |= RTCF_LOCAL; - rcu_read_lock(); in_dev = __in_dev_get_rcu(dev_out); - if (!in_dev) { - rcu_read_unlock(); + if (!in_dev) return -EINVAL; - } + if (res->type == RTN_BROADCAST) { flags |= RTCF_BROADCAST | RTCF_LOCAL; - if (res->fi) { - fib_info_put(res->fi); - res->fi = NULL; - } + res->fi = NULL; } else if (res->type == RTN_MULTICAST) { flags |= RTCF_MULTICAST | RTCF_LOCAL; if (!ip_check_mc(in_dev, oldflp->fl4_dst, oldflp->fl4_src, @@ -2394,10 +2390,8 @@ static int __mkroute_output(struct rtable **result, * default one, but do not gateway in this case. * Yes, it is hack. */ - if (res->fi && res->prefixlen < 4) { - fib_info_put(res->fi); + if (res->fi && res->prefixlen < 4) res->fi = NULL; - } } @@ -2467,6 +2461,7 @@ static int __mkroute_output(struct rtable **result, return 0; } +/* called with rcu_read_lock() */ static int ip_mkroute_output(struct rtable **rp, struct fib_result *res, const struct flowi *fl, @@ -2509,7 +2504,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, struct fib_result res; unsigned int flags = 0; struct net_device *dev_out = NULL; - int free_res = 0; int err; @@ -2636,15 +2630,12 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, err = -ENETUNREACH; goto out; } - free_res = 1; if (res.type == RTN_LOCAL) { if (!fl.fl4_src) fl.fl4_src = fl.fl4_dst; dev_out = net->loopback_dev; fl.oif = dev_out->ifindex; - if (res.fi) - fib_info_put(res.fi); res.fi = NULL; flags |= RTCF_LOCAL; goto make_route; @@ -2668,8 +2659,6 @@ static int ip_route_output_slow(struct net *net, struct rtable **rp, make_route: err = ip_mkroute_output(rp, &res, &fl, oldflp, dev_out, flags); - if (free_res) - fib_res_put(&res); out: return err; } -- cgit v1.2.3 From e31b82136d1adc7a599b6e99d3321e5831841f5a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 5 Oct 2010 19:39:30 +0200 Subject: cfg80211/mac80211: allow per-station GTKs This adds API to allow adding per-station GTKs, updates mac80211 to support it, and also allows drivers to remove a key from hwaccel again when this may be necessary due to multiple GTKs. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- drivers/net/wireless/iwmc3200wifi/cfg80211.c | 7 +- drivers/net/wireless/libertas/cfg.c | 4 +- drivers/net/wireless/rndis_wlan.c | 12 ++-- include/linux/nl80211.h | 12 ++++ include/net/cfg80211.h | 9 ++- include/net/mac80211.h | 24 +++++++ net/mac80211/cfg.c | 32 +++++++--- net/mac80211/ieee80211_i.h | 2 - net/mac80211/key.c | 95 ++++++++++++++++++---------- net/mac80211/key.h | 3 + net/mac80211/rx.c | 41 +++++++----- net/mac80211/sta_info.c | 10 +-- net/mac80211/sta_info.h | 6 +- net/mac80211/tx.c | 2 +- net/wireless/core.h | 2 +- net/wireless/ibss.c | 2 +- net/wireless/nl80211.c | 87 ++++++++++++++++++++++--- net/wireless/sme.c | 2 +- net/wireless/util.c | 12 +++- net/wireless/wext-compat.c | 38 +++++++---- 20 files changed, 293 insertions(+), 109 deletions(-) (limited to 'include/net') diff --git a/drivers/net/wireless/iwmc3200wifi/cfg80211.c b/drivers/net/wireless/iwmc3200wifi/cfg80211.c index 60619678f4e..c6c0eff9b5e 100644 --- a/drivers/net/wireless/iwmc3200wifi/cfg80211.c +++ b/drivers/net/wireless/iwmc3200wifi/cfg80211.c @@ -161,7 +161,7 @@ static int iwm_key_init(struct iwm_key *key, u8 key_index, } static int iwm_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, const u8 *mac_addr, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct iwm_priv *iwm = ndev_to_iwm(ndev); @@ -181,7 +181,8 @@ static int iwm_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev, } static int iwm_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, const u8 *mac_addr, void *cookie, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)) { @@ -206,7 +207,7 @@ static int iwm_cfg80211_get_key(struct wiphy *wiphy, struct net_device *ndev, static int iwm_cfg80211_del_key(struct wiphy *wiphy, struct net_device *ndev, - u8 key_index, const u8 *mac_addr) + u8 key_index, bool pairwise, const u8 *mac_addr) { struct iwm_priv *iwm = ndev_to_iwm(ndev); struct iwm_key *key = &iwm->keys[key_index]; diff --git a/drivers/net/wireless/libertas/cfg.c b/drivers/net/wireless/libertas/cfg.c index cb3b855d949..1dc44bc6511 100644 --- a/drivers/net/wireless/libertas/cfg.c +++ b/drivers/net/wireless/libertas/cfg.c @@ -1438,7 +1438,7 @@ static int lbs_cfg_set_default_key(struct wiphy *wiphy, static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 idx, const u8 *mac_addr, + u8 idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct lbs_private *priv = wiphy_priv(wiphy); @@ -1498,7 +1498,7 @@ static int lbs_cfg_add_key(struct wiphy *wiphy, struct net_device *netdev, static int lbs_cfg_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr) + u8 key_index, bool pairwise, const u8 *mac_addr) { lbs_deb_enter(LBS_DEB_CFG80211); diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c index 719573bbbf8..71b5971da59 100644 --- a/drivers/net/wireless/rndis_wlan.c +++ b/drivers/net/wireless/rndis_wlan.c @@ -540,11 +540,11 @@ static int rndis_set_channel(struct wiphy *wiphy, struct net_device *dev, struct ieee80211_channel *chan, enum nl80211_channel_type channel_type); static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, - struct key_params *params); + u8 key_index, bool pairwise, const u8 *mac_addr, + struct key_params *params); static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr); + u8 key_index, bool pairwise, const u8 *mac_addr); static int rndis_set_default_key(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -2308,8 +2308,8 @@ static int rndis_set_channel(struct wiphy *wiphy, struct net_device *netdev, } static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, - struct key_params *params) + u8 key_index, bool pairwise, const u8 *mac_addr, + struct key_params *params) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; @@ -2344,7 +2344,7 @@ static int rndis_add_key(struct wiphy *wiphy, struct net_device *netdev, } static int rndis_del_key(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr) + u8 key_index, bool pairwise, const u8 *mac_addr) { struct rndis_wlan_private *priv = wiphy_priv(wiphy); struct usbnet *usbdev = priv->usbdev; diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c4efdfa24ed..e451f176e66 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -801,6 +801,9 @@ enum nl80211_commands { * This is used in association with @NL80211_ATTR_WIPHY_TX_POWER_SETTING * for non-automatic settings. * + * @NL80211_ATTR_SUPPORT_IBSS_RSN: The device supports IBSS RSN, which mostly + * means support for per-station GTKs. + * * @NL80211_ATTR_MAX: highest attribute number currently defined * @__NL80211_ATTR_AFTER_LAST: internal use */ @@ -968,6 +971,8 @@ enum nl80211_attrs { NL80211_ATTR_CONTROL_PORT_ETHERTYPE, NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT, + NL80211_ATTR_SUPPORT_IBSS_RSN, + /* add attributes here, update the policy in nl80211.c */ __NL80211_ATTR_AFTER_LAST, @@ -1659,11 +1664,14 @@ enum nl80211_auth_type { * @NL80211_KEYTYPE_GROUP: Group (broadcast/multicast) key * @NL80211_KEYTYPE_PAIRWISE: Pairwise (unicast/individual) key * @NL80211_KEYTYPE_PEERKEY: PeerKey (DLS) + * @NUM_NL80211_KEYTYPES: number of defined key types */ enum nl80211_key_type { NL80211_KEYTYPE_GROUP, NL80211_KEYTYPE_PAIRWISE, NL80211_KEYTYPE_PEERKEY, + + NUM_NL80211_KEYTYPES }; /** @@ -1694,6 +1702,9 @@ enum nl80211_wpa_versions { * CCMP keys, each six bytes in little endian * @NL80211_KEY_DEFAULT: flag indicating default key * @NL80211_KEY_DEFAULT_MGMT: flag indicating default management key + * @NL80211_KEY_TYPE: the key type from enum nl80211_key_type, if not + * specified the default depends on whether a MAC address was + * given with the command using the key or not (u32) * @__NL80211_KEY_AFTER_LAST: internal * @NL80211_KEY_MAX: highest key attribute */ @@ -1705,6 +1716,7 @@ enum nl80211_key_attributes { NL80211_KEY_SEQ, NL80211_KEY_DEFAULT, NL80211_KEY_DEFAULT_MGMT, + NL80211_KEY_TYPE, /* keep last */ __NL80211_KEY_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5f4d8acf7ab..0f77515266b 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1130,13 +1130,14 @@ struct cfg80211_ops { struct vif_params *params); int (*add_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, + u8 key_index, bool pairwise, const u8 *mac_addr, struct key_params *params); int (*get_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr, void *cookie, + u8 key_index, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params*)); int (*del_key)(struct wiphy *wiphy, struct net_device *netdev, - u8 key_index, const u8 *mac_addr); + u8 key_index, bool pairwise, const u8 *mac_addr); int (*set_default_key)(struct wiphy *wiphy, struct net_device *netdev, u8 key_index); @@ -1304,6 +1305,7 @@ struct cfg80211_ops { * @WIPHY_FLAG_CONTROL_PORT_PROTOCOL: This device supports setting the * control port protocol ethertype. The device also honours the * control_port_no_encrypt flag. + * @WIPHY_FLAG_IBSS_RSN: The device supports IBSS RSN. */ enum wiphy_flags { WIPHY_FLAG_CUSTOM_REGULATORY = BIT(0), @@ -1314,6 +1316,7 @@ enum wiphy_flags { WIPHY_FLAG_4ADDR_AP = BIT(5), WIPHY_FLAG_4ADDR_STATION = BIT(6), WIPHY_FLAG_CONTROL_PORT_PROTOCOL = BIT(7), + WIPHY_FLAG_IBSS_RSN = BIT(7), }; struct mac_address { diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 47316a653ae..33aa2e39147 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1041,6 +1041,13 @@ enum ieee80211_tkip_key_type { * @IEEE80211_HW_NEED_DTIM_PERIOD: * This device needs to know the DTIM period for the BSS before * associating. + * + * @IEEE80211_HW_SUPPORTS_PER_STA_GTK: The device's crypto engine supports + * per-station GTKs as used by IBSS RSN or during fast transition. If + * the device doesn't support per-station GTKs, but can be asked not + * to decrypt group addressed frames, then IBSS RSN support is still + * possible but software crypto will be used. Advertise the wiphy flag + * only in that case. */ enum ieee80211_hw_flags { IEEE80211_HW_HAS_RATE_CONTROL = 1<<0, @@ -1064,6 +1071,7 @@ enum ieee80211_hw_flags { IEEE80211_HW_REPORTS_TX_ACK_STATUS = 1<<18, IEEE80211_HW_CONNECTION_MONITOR = 1<<19, IEEE80211_HW_SUPPORTS_CQM_RSSI = 1<<20, + IEEE80211_HW_SUPPORTS_PER_STA_GTK = 1<<21, }; /** @@ -2582,6 +2590,22 @@ void ieee80211_chswitch_done(struct ieee80211_vif *vif, bool success); void ieee80211_request_smps(struct ieee80211_vif *vif, enum ieee80211_smps_mode smps_mode); +/** + * ieee80211_key_removed - disable hw acceleration for key + * @key_conf: The key hw acceleration should be disabled for + * + * This allows drivers to indicate that the given key has been + * removed from hardware acceleration, due to a new key that + * was added. Don't use this if the key can continue to be used + * for TX, if the key restriction is on RX only it is permitted + * to keep the key for TX only and not call this function. + * + * Due to locking constraints, it may only be called during + * @set_key. This function must be allowed to sleep, and the + * key it tries to disable may still be used until it returns. + */ +void ieee80211_key_removed(struct ieee80211_key_conf *key_conf); + /* Rate control API */ /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 94bf550bd4c..8b0e874a3d6 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -103,7 +103,7 @@ static int ieee80211_change_iface(struct wiphy *wiphy, } static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, const u8 *mac_addr, + u8 key_idx, bool pairwise, const u8 *mac_addr, struct key_params *params) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); @@ -131,6 +131,9 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, if (IS_ERR(key)) return PTR_ERR(key); + if (pairwise) + key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; + mutex_lock(&sdata->local->sta_mtx); if (mac_addr) { @@ -153,7 +156,7 @@ static int ieee80211_add_key(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, const u8 *mac_addr) + u8 key_idx, bool pairwise, const u8 *mac_addr) { struct ieee80211_sub_if_data *sdata; struct sta_info *sta; @@ -170,10 +173,17 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, if (!sta) goto out_unlock; - if (sta->key) { - ieee80211_key_free(sdata->local, sta->key); - WARN_ON(sta->key); - ret = 0; + if (pairwise) { + if (sta->ptk) { + ieee80211_key_free(sdata->local, sta->ptk); + ret = 0; + } + } else { + if (sta->gtk[key_idx]) { + ieee80211_key_free(sdata->local, + sta->gtk[key_idx]); + ret = 0; + } } goto out_unlock; @@ -195,7 +205,8 @@ static int ieee80211_del_key(struct wiphy *wiphy, struct net_device *dev, } static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, - u8 key_idx, const u8 *mac_addr, void *cookie, + u8 key_idx, bool pairwise, const u8 *mac_addr, + void *cookie, void (*callback)(void *cookie, struct key_params *params)) { @@ -203,7 +214,7 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, struct sta_info *sta = NULL; u8 seq[6] = {0}; struct key_params params; - struct ieee80211_key *key; + struct ieee80211_key *key = NULL; u32 iv32; u16 iv16; int err = -ENOENT; @@ -217,7 +228,10 @@ static int ieee80211_get_key(struct wiphy *wiphy, struct net_device *dev, if (!sta) goto out; - key = sta->key; + if (pairwise) + key = sta->ptk; + else if (key_idx < NUM_DEFAULT_KEYS) + key = sta->gtk[key_idx]; } else key = sdata->keys[key_idx]; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 76c2b50ec6f..f0610fa4fbe 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -549,8 +549,6 @@ struct ieee80211_sub_if_data { struct ieee80211_fragment_entry fragments[IEEE80211_FRAGMENT_MAX]; unsigned int fragment_next; -#define NUM_DEFAULT_KEYS 4 -#define NUM_DEFAULT_MGMT_KEYS 2 struct ieee80211_key *keys[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; struct ieee80211_key *default_key; struct ieee80211_key *default_mgmt_key; diff --git a/net/mac80211/key.c b/net/mac80211/key.c index 6a63d1abd14..ccd676b2f59 100644 --- a/net/mac80211/key.c +++ b/net/mac80211/key.c @@ -68,15 +68,21 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) might_sleep(); - if (!key->local->ops->set_key) { - ret = -EOPNOTSUPP; + if (!key->local->ops->set_key) goto out_unsupported; - } assert_key_lock(key->local); sta = get_sta_for_key(key); + /* + * If this is a per-STA GTK, check if it + * is supported; if not, return. + */ + if (sta && !(key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE) && + !(key->local->hw.flags & IEEE80211_HW_SUPPORTS_PER_STA_GTK)) + goto out_unsupported; + sdata = key->sdata; if (sdata->vif.type == NL80211_IFTYPE_AP_VLAN) sdata = container_of(sdata->bss, @@ -85,31 +91,28 @@ static int ieee80211_key_enable_hw_accel(struct ieee80211_key *key) ret = drv_set_key(key->local, SET_KEY, sdata, sta, &key->conf); - if (!ret) + if (!ret) { key->flags |= KEY_FLAG_UPLOADED_TO_HARDWARE; + return 0; + } - if (ret && ret != -ENOSPC && ret != -EOPNOTSUPP) + if (ret != -ENOSPC && ret != -EOPNOTSUPP) wiphy_err(key->local->hw.wiphy, "failed to set key (%d, %pM) to hardware (%d)\n", key->conf.keyidx, sta ? sta->addr : bcast_addr, ret); -out_unsupported: - if (ret) { - switch (key->conf.cipher) { - case WLAN_CIPHER_SUITE_WEP40: - case WLAN_CIPHER_SUITE_WEP104: - case WLAN_CIPHER_SUITE_TKIP: - case WLAN_CIPHER_SUITE_CCMP: - case WLAN_CIPHER_SUITE_AES_CMAC: - /* all of these we can do in software */ - ret = 0; - break; - default: - ret = -EINVAL; - } + out_unsupported: + switch (key->conf.cipher) { + case WLAN_CIPHER_SUITE_WEP40: + case WLAN_CIPHER_SUITE_WEP104: + case WLAN_CIPHER_SUITE_TKIP: + case WLAN_CIPHER_SUITE_CCMP: + case WLAN_CIPHER_SUITE_AES_CMAC: + /* all of these we can do in software */ + return 0; + default: + return -EINVAL; } - - return ret; } static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) @@ -147,6 +150,26 @@ static void ieee80211_key_disable_hw_accel(struct ieee80211_key *key) key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; } +void ieee80211_key_removed(struct ieee80211_key_conf *key_conf) +{ + struct ieee80211_key *key; + + key = container_of(key_conf, struct ieee80211_key, conf); + + might_sleep(); + assert_key_lock(key->local); + + key->flags &= ~KEY_FLAG_UPLOADED_TO_HARDWARE; + + /* + * Flush TX path to avoid attempts to use this key + * after this function returns. Until then, drivers + * must be prepared to handle the key. + */ + synchronize_rcu(); +} +EXPORT_SYMBOL_GPL(ieee80211_key_removed); + static void __ieee80211_set_default_key(struct ieee80211_sub_if_data *sdata, int idx) { @@ -202,6 +225,7 @@ void ieee80211_set_default_mgmt_key(struct ieee80211_sub_if_data *sdata, static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, struct sta_info *sta, + bool pairwise, struct ieee80211_key *old, struct ieee80211_key *new) { @@ -210,8 +234,14 @@ static void __ieee80211_key_replace(struct ieee80211_sub_if_data *sdata, if (new) list_add(&new->list, &sdata->key_list); - if (sta) { - rcu_assign_pointer(sta->key, new); + if (sta && pairwise) { + rcu_assign_pointer(sta->ptk, new); + } else if (sta) { + if (old) + idx = old->conf.keyidx; + else + idx = new->conf.keyidx; + rcu_assign_pointer(sta->gtk[idx], new); } else { WARN_ON(new && old && new->conf.keyidx != old->conf.keyidx); @@ -355,6 +385,7 @@ int ieee80211_key_link(struct ieee80211_key *key, { struct ieee80211_key *old_key; int idx, ret; + bool pairwise = key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE; BUG_ON(!sdata); BUG_ON(!key); @@ -371,13 +402,6 @@ int ieee80211_key_link(struct ieee80211_key *key, */ if (test_sta_flags(sta, WLAN_STA_WME)) key->conf.flags |= IEEE80211_KEY_FLAG_WMM_STA; - - /* - * This key is for a specific sta interface, - * inform the driver that it should try to store - * this key as pairwise key. - */ - key->conf.flags |= IEEE80211_KEY_FLAG_PAIRWISE; } else { if (sdata->vif.type == NL80211_IFTYPE_STATION) { struct sta_info *ap; @@ -399,12 +423,14 @@ int ieee80211_key_link(struct ieee80211_key *key, mutex_lock(&sdata->local->key_mtx); - if (sta) - old_key = sta->key; + if (sta && pairwise) + old_key = sta->ptk; + else if (sta) + old_key = sta->gtk[idx]; else old_key = sdata->keys[idx]; - __ieee80211_key_replace(sdata, sta, old_key, key); + __ieee80211_key_replace(sdata, sta, pairwise, old_key, key); __ieee80211_key_destroy(old_key); ieee80211_debugfs_key_add(key); @@ -423,7 +449,8 @@ static void __ieee80211_key_free(struct ieee80211_key *key) */ if (key->sdata) __ieee80211_key_replace(key->sdata, key->sta, - key, NULL); + key->conf.flags & IEEE80211_KEY_FLAG_PAIRWISE, + key, NULL); __ieee80211_key_destroy(key); } diff --git a/net/mac80211/key.h b/net/mac80211/key.h index cb9a4a65cc6..0db1c0f5f69 100644 --- a/net/mac80211/key.h +++ b/net/mac80211/key.h @@ -16,6 +16,9 @@ #include #include +#define NUM_DEFAULT_KEYS 4 +#define NUM_DEFAULT_MGMT_KEYS 2 + #define WEP_IV_LEN 4 #define WEP_ICV_LEN 4 #define ALG_TKIP_KEY_LEN 32 diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c index b3e161ffa4b..b67221def58 100644 --- a/net/mac80211/rx.c +++ b/net/mac80211/rx.c @@ -846,7 +846,7 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) int keyidx; int hdrlen; ieee80211_rx_result result = RX_DROP_UNUSABLE; - struct ieee80211_key *stakey = NULL; + struct ieee80211_key *sta_ptk = NULL; int mmie_keyidx = -1; __le16 fc; @@ -888,15 +888,15 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) rx->key = NULL; if (rx->sta) - stakey = rcu_dereference(rx->sta->key); + sta_ptk = rcu_dereference(rx->sta->ptk); fc = hdr->frame_control; if (!ieee80211_has_protected(fc)) mmie_keyidx = ieee80211_get_mmie_keyidx(rx->skb); - if (!is_multicast_ether_addr(hdr->addr1) && stakey) { - rx->key = stakey; + if (!is_multicast_ether_addr(hdr->addr1) && sta_ptk) { + rx->key = sta_ptk; if ((status->flag & RX_FLAG_DECRYPTED) && (status->flag & RX_FLAG_IV_STRIPPED)) return RX_CONTINUE; @@ -912,7 +912,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) if (mmie_keyidx < NUM_DEFAULT_KEYS || mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS) return RX_DROP_MONITOR; /* unexpected BIP keyidx */ - rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); + if (rx->sta) + rx->key = rcu_dereference(rx->sta->gtk[mmie_keyidx]); + if (!rx->key) + rx->key = rcu_dereference(rx->sdata->keys[mmie_keyidx]); } else if (!ieee80211_has_protected(fc)) { /* * The frame was not protected, so skip decryption. However, we @@ -955,17 +958,25 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx) skb_copy_bits(rx->skb, hdrlen + 3, &keyid, 1); keyidx = keyid >> 6; - rx->key = rcu_dereference(rx->sdata->keys[keyidx]); + /* check per-station GTK first, if multicast packet */ + if (is_multicast_ether_addr(hdr->addr1) && rx->sta) + rx->key = rcu_dereference(rx->sta->gtk[keyidx]); - /* - * RSNA-protected unicast frames should always be sent with - * pairwise or station-to-station keys, but for WEP we allow - * using a key index as well. - */ - if (rx->key && rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 && - rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 && - !is_multicast_ether_addr(hdr->addr1)) - rx->key = NULL; + /* if not found, try default key */ + if (!rx->key) { + rx->key = rcu_dereference(rx->sdata->keys[keyidx]); + + /* + * RSNA-protected unicast frames should always be + * sent with pairwise or station-to-station keys, + * but for WEP we allow using a key index as well. + */ + if (rx->key && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP40 && + rx->key->conf.cipher != WLAN_CIPHER_SUITE_WEP104 && + !is_multicast_ether_addr(hdr->addr1)) + rx->key = NULL; + } } if (rx->key) { diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index aeaf2d6fccc..6d8f897d876 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -616,7 +616,7 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) struct ieee80211_sub_if_data *sdata; struct sk_buff *skb; unsigned long flags; - int ret; + int ret, i; might_sleep(); @@ -644,10 +644,10 @@ static int __must_check __sta_info_destroy(struct sta_info *sta) if (ret) return ret; - if (sta->key) { - ieee80211_key_free(local, sta->key); - WARN_ON(sta->key); - } + for (i = 0; i < NUM_DEFAULT_KEYS; i++) + ieee80211_key_free(local, sta->gtk[i]); + if (sta->ptk) + ieee80211_key_free(local, sta->ptk); sta->dead = true; diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index cf21a2e8134..9265acadef3 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -199,7 +199,8 @@ enum plink_state { * @hnext: hash table linked list pointer * @local: pointer to the global information * @sdata: virtual interface this station belongs to - * @key: peer key negotiated with this station, if any + * @ptk: peer key negotiated with this station, if any + * @gtk: group keys negotiated with this station, if any * @rate_ctrl: rate control algorithm reference * @rate_ctrl_priv: rate control private per-STA pointer * @last_tx_rate: rate used for last transmit, to report to userspace as @@ -254,7 +255,8 @@ struct sta_info { struct sta_info *hnext; struct ieee80211_local *local; struct ieee80211_sub_if_data *sdata; - struct ieee80211_key *key; + struct ieee80211_key *gtk[NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS]; + struct ieee80211_key *ptk; struct rate_control_ref *rate_ctrl; void *rate_ctrl_priv; spinlock_t lock; diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c index 258fbdbedbd..96c59430950 100644 --- a/net/mac80211/tx.c +++ b/net/mac80211/tx.c @@ -532,7 +532,7 @@ ieee80211_tx_h_select_key(struct ieee80211_tx_data *tx) if (unlikely(info->flags & IEEE80211_TX_INTFL_DONT_ENCRYPT)) tx->key = NULL; - else if (tx->sta && (key = rcu_dereference(tx->sta->key))) + else if (tx->sta && (key = rcu_dereference(tx->sta->ptk))) tx->key = key; else if (ieee80211_is_mgmt(hdr->frame_control) && is_multicast_ether_addr(hdr->addr1) && diff --git a/net/wireless/core.h b/net/wireless/core.h index 37580e090a3..2d1d4c70113 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -375,7 +375,7 @@ bool cfg80211_sme_failed_reassoc(struct wireless_dev *wdev); /* internal helpers */ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, - const u8 *mac_addr); + bool pairwise, const u8 *mac_addr); void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, size_t ie_len, u16 reason, bool from_ap); void cfg80211_sme_scan_done(struct net_device *dev); diff --git a/net/wireless/ibss.c b/net/wireless/ibss.c index 8cb6e08373b..f33fbb79437 100644 --- a/net/wireless/ibss.c +++ b/net/wireless/ibss.c @@ -160,7 +160,7 @@ static void __cfg80211_clear_ibss(struct net_device *dev, bool nowext) */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) - rdev->ops->del_key(wdev->wiphy, dev, i, NULL); + rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL); if (wdev->current_bss) { cfg80211_unhold_bss(wdev->current_bss); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 0c9497170f1..8826888cc14 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -93,6 +93,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_KEY_CIPHER] = { .type = NLA_U32 }, [NL80211_ATTR_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_ATTR_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, + [NL80211_ATTR_KEY_TYPE] = { .type = NLA_U32 }, [NL80211_ATTR_BEACON_INTERVAL] = { .type = NLA_U32 }, [NL80211_ATTR_DTIM_PERIOD] = { .type = NLA_U32 }, @@ -168,7 +169,7 @@ static const struct nla_policy nl80211_policy[NL80211_ATTR_MAX+1] = { [NL80211_ATTR_FRAME_TYPE] = { .type = NLA_U16 }, }; -/* policy for the attributes */ +/* policy for the key attributes */ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_DATA] = { .type = NLA_BINARY, .len = WLAN_MAX_KEY_LEN }, [NL80211_KEY_IDX] = { .type = NLA_U8 }, @@ -176,6 +177,7 @@ static const struct nla_policy nl80211_key_policy[NL80211_KEY_MAX + 1] = { [NL80211_KEY_SEQ] = { .type = NLA_BINARY, .len = 8 }, [NL80211_KEY_DEFAULT] = { .type = NLA_FLAG }, [NL80211_KEY_DEFAULT_MGMT] = { .type = NLA_FLAG }, + [NL80211_KEY_TYPE] = { .type = NLA_U32 }, }; /* ifidx get helper */ @@ -306,6 +308,7 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct key_parse { struct key_params p; int idx; + int type; bool def, defmgmt; }; @@ -336,6 +339,12 @@ static int nl80211_parse_key_new(struct nlattr *key, struct key_parse *k) if (tb[NL80211_KEY_CIPHER]) k->p.cipher = nla_get_u32(tb[NL80211_KEY_CIPHER]); + if (tb[NL80211_KEY_TYPE]) { + k->type = nla_get_u32(tb[NL80211_KEY_TYPE]); + if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) + return -EINVAL; + } + return 0; } @@ -360,6 +369,12 @@ static int nl80211_parse_key_old(struct genl_info *info, struct key_parse *k) k->def = !!info->attrs[NL80211_ATTR_KEY_DEFAULT]; k->defmgmt = !!info->attrs[NL80211_ATTR_KEY_DEFAULT_MGMT]; + if (info->attrs[NL80211_ATTR_KEY_TYPE]) { + k->type = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); + if (k->type < 0 || k->type >= NUM_NL80211_KEYTYPES) + return -EINVAL; + } + return 0; } @@ -369,6 +384,7 @@ static int nl80211_parse_key(struct genl_info *info, struct key_parse *k) memset(k, 0, sizeof(*k)); k->idx = -1; + k->type = -1; if (info->attrs[NL80211_ATTR_KEY]) err = nl80211_parse_key_new(info->attrs[NL80211_ATTR_KEY], k); @@ -433,7 +449,7 @@ nl80211_parse_connkeys(struct cfg80211_registered_device *rdev, } else if (parse.defmgmt) goto error; err = cfg80211_validate_key_settings(rdev, &parse.p, - parse.idx, NULL); + parse.idx, false, NULL); if (err) goto error; result->params[parse.idx].cipher = parse.p.cipher; @@ -516,6 +532,9 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, NLA_PUT_U16(msg, NL80211_ATTR_MAX_SCAN_IE_LEN, dev->wiphy.max_scan_ie_len); + if (dev->wiphy.flags & WIPHY_FLAG_IBSS_RSN) + NLA_PUT_FLAG(msg, NL80211_ATTR_SUPPORT_IBSS_RSN); + NLA_PUT(msg, NL80211_ATTR_CIPHER_SUITES, sizeof(u32) * dev->wiphy.n_cipher_suites, dev->wiphy.cipher_suites); @@ -1446,7 +1465,8 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev = info->user_ptr[1]; u8 key_idx = 0; - u8 *mac_addr = NULL; + const u8 *mac_addr = NULL; + bool pairwise; struct get_key_cookie cookie = { .error = 0, }; @@ -1462,6 +1482,17 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + pairwise = !!mac_addr; + if (info->attrs[NL80211_ATTR_KEY_TYPE]) { + u32 kt = nla_get_u32(info->attrs[NL80211_ATTR_KEY_TYPE]); + if (kt >= NUM_NL80211_KEYTYPES) + return -EINVAL; + if (kt != NL80211_KEYTYPE_GROUP && + kt != NL80211_KEYTYPE_PAIRWISE) + return -EINVAL; + pairwise = kt == NL80211_KEYTYPE_PAIRWISE; + } + if (!rdev->ops->get_key) return -EOPNOTSUPP; @@ -1482,8 +1513,12 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (mac_addr) NLA_PUT(msg, NL80211_ATTR_MAC, ETH_ALEN, mac_addr); - err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, mac_addr, - &cookie, get_key_callback); + if (pairwise && mac_addr && + !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + return -ENOENT; + + err = rdev->ops->get_key(&rdev->wiphy, dev, key_idx, pairwise, + mac_addr, &cookie, get_key_callback); if (err) goto free_msg; @@ -1553,7 +1588,7 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) int err; struct net_device *dev = info->user_ptr[1]; struct key_parse key; - u8 *mac_addr = NULL; + const u8 *mac_addr = NULL; err = nl80211_parse_key(info, &key); if (err) @@ -1565,16 +1600,31 @@ static int nl80211_new_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + if (key.type == -1) { + if (mac_addr) + key.type = NL80211_KEYTYPE_PAIRWISE; + else + key.type = NL80211_KEYTYPE_GROUP; + } + + /* for now */ + if (key.type != NL80211_KEYTYPE_PAIRWISE && + key.type != NL80211_KEYTYPE_GROUP) + return -EINVAL; + if (!rdev->ops->add_key) return -EOPNOTSUPP; - if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, mac_addr)) + if (cfg80211_validate_key_settings(rdev, &key.p, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE, + mac_addr)) return -EINVAL; wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); if (!err) err = rdev->ops->add_key(&rdev->wiphy, dev, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE, mac_addr, &key.p); wdev_unlock(dev->ieee80211_ptr); @@ -1596,13 +1646,32 @@ static int nl80211_del_key(struct sk_buff *skb, struct genl_info *info) if (info->attrs[NL80211_ATTR_MAC]) mac_addr = nla_data(info->attrs[NL80211_ATTR_MAC]); + if (key.type == -1) { + if (mac_addr) + key.type = NL80211_KEYTYPE_PAIRWISE; + else + key.type = NL80211_KEYTYPE_GROUP; + } + + /* for now */ + if (key.type != NL80211_KEYTYPE_PAIRWISE && + key.type != NL80211_KEYTYPE_GROUP) + return -EINVAL; + if (!rdev->ops->del_key) return -EOPNOTSUPP; wdev_lock(dev->ieee80211_ptr); err = nl80211_key_allowed(dev->ieee80211_ptr); + + if (key.type == NL80211_KEYTYPE_PAIRWISE && mac_addr && + !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + err = -ENOENT; + if (!err) - err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, mac_addr); + err = rdev->ops->del_key(&rdev->wiphy, dev, key.idx, + key.type == NL80211_KEYTYPE_PAIRWISE, + mac_addr); #ifdef CONFIG_CFG80211_WEXT if (!err) { @@ -3212,6 +3281,8 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info) return err; if (key.idx >= 0) { + if (key.type != -1 && key.type != NL80211_KEYTYPE_GROUP) + return -EINVAL; if (!key.p.key || !key.p.key_len) return -EINVAL; if ((key.p.cipher != WLAN_CIPHER_SUITE_WEP40 || diff --git a/net/wireless/sme.c b/net/wireless/sme.c index f161b984454..e17b0bee6bd 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -698,7 +698,7 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, */ if (rdev->ops->del_key) for (i = 0; i < 6; i++) - rdev->ops->del_key(wdev->wiphy, dev, i, NULL); + rdev->ops->del_key(wdev->wiphy, dev, i, false, NULL); #ifdef CONFIG_CFG80211_WEXT memset(&wrqu, 0, sizeof(wrqu)); diff --git a/net/wireless/util.c b/net/wireless/util.c index fb5448f7d55..76120aeda57 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -144,19 +144,25 @@ void ieee80211_set_bitrate_flags(struct wiphy *wiphy) int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, struct key_params *params, int key_idx, - const u8 *mac_addr) + bool pairwise, const u8 *mac_addr) { int i; if (key_idx > 5) return -EINVAL; + if (!pairwise && mac_addr && !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + return -EINVAL; + + if (pairwise && !mac_addr) + return -EINVAL; + /* * Disallow pairwise keys with non-zero index unless it's WEP * (because current deployments use pairwise WEP keys with * non-zero indizes but 802.11i clearly specifies to use zero) */ - if (mac_addr && key_idx && + if (pairwise && key_idx && params->cipher != WLAN_CIPHER_SUITE_WEP40 && params->cipher != WLAN_CIPHER_SUITE_WEP104) return -EINVAL; @@ -677,7 +683,7 @@ void cfg80211_upload_connect_keys(struct wireless_dev *wdev) for (i = 0; i < 6; i++) { if (!wdev->connect_keys->params[i].cipher) continue; - if (rdev->ops->add_key(wdev->wiphy, dev, i, NULL, + if (rdev->ops->add_key(wdev->wiphy, dev, i, false, NULL, &wdev->connect_keys->params[i])) { printk(KERN_ERR "%s: failed to set key %d\n", dev->name, i); diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 7e5c3a45f81..6002265289c 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -432,14 +432,17 @@ int cfg80211_wext_giwretry(struct net_device *dev, EXPORT_SYMBOL_GPL(cfg80211_wext_giwretry); static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *addr, - bool remove, bool tx_key, int idx, - struct key_params *params) + struct net_device *dev, bool pairwise, + const u8 *addr, bool remove, bool tx_key, + int idx, struct key_params *params) { struct wireless_dev *wdev = dev->ieee80211_ptr; int err, i; bool rejoin = false; + if (pairwise && !addr) + return -EINVAL; + if (!wdev->wext.keys) { wdev->wext.keys = kzalloc(sizeof(*wdev->wext.keys), GFP_KERNEL); @@ -478,7 +481,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, __cfg80211_leave_ibss(rdev, wdev->netdev, true); rejoin = true; } - err = rdev->ops->del_key(&rdev->wiphy, dev, idx, addr); + + if (!pairwise && addr && + !(rdev->wiphy.flags & WIPHY_FLAG_IBSS_RSN)) + err = -ENOENT; + else + err = rdev->ops->del_key(&rdev->wiphy, dev, idx, + pairwise, addr); } wdev->wext.connect.privacy = false; /* @@ -507,12 +516,13 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, if (addr) tx_key = false; - if (cfg80211_validate_key_settings(rdev, params, idx, addr)) + if (cfg80211_validate_key_settings(rdev, params, idx, pairwise, addr)) return -EINVAL; err = 0; if (wdev->current_bss) - err = rdev->ops->add_key(&rdev->wiphy, dev, idx, addr, params); + err = rdev->ops->add_key(&rdev->wiphy, dev, idx, + pairwise, addr, params); if (err) return err; @@ -563,17 +573,17 @@ static int __cfg80211_set_encryption(struct cfg80211_registered_device *rdev, } static int cfg80211_set_encryption(struct cfg80211_registered_device *rdev, - struct net_device *dev, const u8 *addr, - bool remove, bool tx_key, int idx, - struct key_params *params) + struct net_device *dev, bool pairwise, + const u8 *addr, bool remove, bool tx_key, + int idx, struct key_params *params) { int err; /* devlist mutex needed for possible IBSS re-join */ mutex_lock(&rdev->devlist_mtx); wdev_lock(dev->ieee80211_ptr); - err = __cfg80211_set_encryption(rdev, dev, addr, remove, - tx_key, idx, params); + err = __cfg80211_set_encryption(rdev, dev, pairwise, addr, + remove, tx_key, idx, params); wdev_unlock(dev->ieee80211_ptr); mutex_unlock(&rdev->devlist_mtx); @@ -635,7 +645,7 @@ int cfg80211_wext_siwencode(struct net_device *dev, else if (!remove) return -EINVAL; - return cfg80211_set_encryption(rdev, dev, NULL, remove, + return cfg80211_set_encryption(rdev, dev, false, NULL, remove, wdev->wext.default_key == -1, idx, ¶ms); } @@ -725,7 +735,9 @@ int cfg80211_wext_siwencodeext(struct net_device *dev, } return cfg80211_set_encryption( - rdev, dev, addr, remove, + rdev, dev, + !(ext->ext_flags & IW_ENCODE_EXT_GROUP_KEY), + addr, remove, ext->ext_flags & IW_ENCODE_EXT_SET_TX_KEY, idx, ¶ms); } -- cgit v1.2.3 From b206b4ef062d83c0875a085672ed50e8c8b01521 Mon Sep 17 00:00:00 2001 From: Bruno Randolf Date: Wed, 6 Oct 2010 18:34:12 +0900 Subject: nl80211/mac80211: Add retry and failed transmission count to station info This information is already available in mac80211, we just need to export it via cfg80211 and nl80211. Signed-off-by: Bruno Randolf Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/linux/nl80211.h | 4 ++++ include/net/cfg80211.h | 8 ++++++++ net/mac80211/cfg.c | 4 ++++ net/wireless/nl80211.c | 6 ++++++ 4 files changed, 22 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index e451f176e66..c08709fe36f 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1137,6 +1137,8 @@ enum nl80211_rate_info { * @NL80211_STA_INFO_RX_PACKETS: total received packet (u32, from this station) * @NL80211_STA_INFO_TX_PACKETS: total transmitted packets (u32, to this * station) + * @NL80211_STA_INFO_TX_RETRIES: total retries (u32, to this station) + * @NL80211_STA_INFO_TX_FAILED: total failed packets (u32, to this station) */ enum nl80211_sta_info { __NL80211_STA_INFO_INVALID, @@ -1150,6 +1152,8 @@ enum nl80211_sta_info { NL80211_STA_INFO_TX_BITRATE, NL80211_STA_INFO_RX_PACKETS, NL80211_STA_INFO_TX_PACKETS, + NL80211_STA_INFO_TX_RETRIES, + NL80211_STA_INFO_TX_FAILED, /* keep last */ __NL80211_STA_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0f77515266b..e76daaa7dc2 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -401,6 +401,8 @@ struct station_parameters { * (tx_bitrate, tx_bitrate_flags and tx_bitrate_mcs) * @STATION_INFO_RX_PACKETS: @rx_packets filled * @STATION_INFO_TX_PACKETS: @tx_packets filled + * @STATION_INFO_TX_RETRIES: @tx_retries filled + * @STATION_INFO_TX_FAILED: @tx_failed filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -413,6 +415,8 @@ enum station_info_flags { STATION_INFO_TX_BITRATE = 1<<7, STATION_INFO_RX_PACKETS = 1<<8, STATION_INFO_TX_PACKETS = 1<<9, + STATION_INFO_TX_RETRIES = 1<<10, + STATION_INFO_TX_FAILED = 1<<11, }; /** @@ -462,6 +466,8 @@ struct rate_info { * @txrate: current unicast bitrate to this station * @rx_packets: packets received from this station * @tx_packets: packets transmitted to this station + * @tx_retries: cumulative retry counts + * @tx_failed: number of failed transmissions (retries exceeded, no ACK) * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -479,6 +485,8 @@ struct station_info { struct rate_info txrate; u32 rx_packets; u32 tx_packets; + u32 tx_retries; + u32 tx_failed; int generation; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 8b0e874a3d6..2e5a3fb38ef 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -327,6 +327,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_TX_BYTES | STATION_INFO_RX_PACKETS | STATION_INFO_TX_PACKETS | + STATION_INFO_TX_RETRIES | + STATION_INFO_TX_FAILED | STATION_INFO_TX_BITRATE; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); @@ -334,6 +336,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->tx_bytes = sta->tx_bytes; sinfo->rx_packets = sta->rx_packets; sinfo->tx_packets = sta->tx_packets; + sinfo->tx_retries = sta->tx_retry_count; + sinfo->tx_failed = sta->tx_retry_failed; if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 9942f0b061f..524f5540283 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -1890,6 +1890,12 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, if (sinfo->filled & STATION_INFO_TX_PACKETS) NLA_PUT_U32(msg, NL80211_STA_INFO_TX_PACKETS, sinfo->tx_packets); + if (sinfo->filled & STATION_INFO_TX_RETRIES) + NLA_PUT_U32(msg, NL80211_STA_INFO_TX_RETRIES, + sinfo->tx_retries); + if (sinfo->filled & STATION_INFO_TX_FAILED) + NLA_PUT_U32(msg, NL80211_STA_INFO_TX_FAILED, + sinfo->tx_failed); nla_nest_end(msg, sinfoattr); return genlmsg_end(msg, hdr); -- cgit v1.2.3 From 767e97e1e0db0d0f3152cd2f3bd3403596aedbad Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 6 Oct 2010 17:49:21 -0700 Subject: neigh: RCU conversion of struct neighbour This is the second step for neighbour RCU conversion. (first was commit d6bf7817 : RCU conversion of neigh hash table) neigh_lookup() becomes lockless, but still take a reference on found neighbour. (no more read_lock()/read_unlock() on tbl->lock) struct neighbour gets an additional rcu_head field and is freed after an RCU grace period. Future work would need to eventually not take a reference on neighbour for temporary dst (DST_NOCACHE), but this would need dst->_neighbour to use a noref bit like we did for skb->_dst. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 5 +- net/core/neighbour.c | 137 ++++++++++++++++++++++++++++++------------------ 2 files changed, 88 insertions(+), 54 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 37845dae648..a4538d55370 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -91,7 +91,7 @@ struct neigh_statistics { #define NEIGH_CACHE_STAT_INC(tbl, field) this_cpu_inc((tbl)->stats->field) struct neighbour { - struct neighbour *next; + struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; struct net_device *dev; @@ -111,6 +111,7 @@ struct neighbour { struct sk_buff_head arp_queue; struct timer_list timer; const struct neigh_ops *ops; + struct rcu_head rcu; u8 primary_key[0]; }; @@ -139,7 +140,7 @@ struct pneigh_entry { */ struct neigh_hash_table { - struct neighbour **hash_buckets; + struct neighbour __rcu **hash_buckets; unsigned int hash_mask; __u32 hash_rnd; struct rcu_head rcu; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index dd8920e4f50..3ffafaa0414 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -139,10 +139,12 @@ static int neigh_forced_gc(struct neigh_table *tbl) nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (i = 0; i <= nht->hash_mask; i++) { - struct neighbour *n, **np; + struct neighbour *n; + struct neighbour __rcu **np; np = &nht->hash_buckets[i]; - while ((n = *np) != NULL) { + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { /* Neighbour record may be discarded if: * - nobody refers to it. * - it is not permanent @@ -150,7 +152,9 @@ static int neigh_forced_gc(struct neigh_table *tbl) write_lock(&n->lock); if (atomic_read(&n->refcnt) == 1 && !(n->nud_state & NUD_PERMANENT)) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); n->dead = 1; shrunk = 1; write_unlock(&n->lock); @@ -208,14 +212,18 @@ static void neigh_flush_dev(struct neigh_table *tbl, struct net_device *dev) lockdep_is_held(&tbl->lock)); for (i = 0; i <= nht->hash_mask; i++) { - struct neighbour *n, **np = &nht->hash_buckets[i]; + struct neighbour *n; + struct neighbour __rcu **np = &nht->hash_buckets[i]; - while ((n = *np) != NULL) { + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { if (dev && n->dev != dev) { np = &n->next; continue; } - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); write_lock(&n->lock); neigh_del_timer(n); n->dead = 1; @@ -323,7 +331,7 @@ static struct neigh_hash_table *neigh_hash_alloc(unsigned int entries) kfree(ret); return NULL; } - ret->hash_buckets = buckets; + rcu_assign_pointer(ret->hash_buckets, buckets); ret->hash_mask = entries - 1; get_random_bytes(&ret->hash_rnd, sizeof(ret->hash_rnd)); return ret; @@ -362,17 +370,22 @@ static struct neigh_hash_table *neigh_hash_grow(struct neigh_table *tbl, for (i = 0; i <= old_nht->hash_mask; i++) { struct neighbour *n, *next; - for (n = old_nht->hash_buckets[i]; + for (n = rcu_dereference_protected(old_nht->hash_buckets[i], + lockdep_is_held(&tbl->lock)); n != NULL; n = next) { hash = tbl->hash(n->primary_key, n->dev, new_nht->hash_rnd); hash &= new_nht->hash_mask; - next = n->next; - - n->next = new_nht->hash_buckets[hash]; - new_nht->hash_buckets[hash] = n; + next = rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock)); + + rcu_assign_pointer(n->next, + rcu_dereference_protected( + new_nht->hash_buckets[hash], + lockdep_is_held(&tbl->lock))); + rcu_assign_pointer(new_nht->hash_buckets[hash], n); } } @@ -394,15 +407,18 @@ struct neighbour *neigh_lookup(struct neigh_table *tbl, const void *pkey, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); hash_val = tbl->hash(pkey, dev, nht->hash_rnd) & nht->hash_mask; - read_lock(&tbl->lock); - for (n = nht->hash_buckets[hash_val]; n; n = n->next) { + + for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); + n != NULL; + n = rcu_dereference_bh(n->next)) { if (dev == n->dev && !memcmp(n->primary_key, pkey, key_len)) { - neigh_hold(n); + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; } } - read_unlock(&tbl->lock); + rcu_read_unlock_bh(); return n; } @@ -421,16 +437,19 @@ struct neighbour *neigh_lookup_nodev(struct neigh_table *tbl, struct net *net, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); hash_val = tbl->hash(pkey, NULL, nht->hash_rnd) & nht->hash_mask; - read_lock(&tbl->lock); - for (n = nht->hash_buckets[hash_val]; n; n = n->next) { + + for (n = rcu_dereference_bh(nht->hash_buckets[hash_val]); + n != NULL; + n = rcu_dereference_bh(n->next)) { if (!memcmp(n->primary_key, pkey, key_len) && net_eq(dev_net(n->dev), net)) { - neigh_hold(n); + if (!atomic_inc_not_zero(&n->refcnt)) + n = NULL; NEIGH_CACHE_STAT_INC(tbl, hits); break; } } - read_unlock(&tbl->lock); + rcu_read_unlock_bh(); return n; } @@ -483,7 +502,11 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, goto out_tbl_unlock; } - for (n1 = nht->hash_buckets[hash_val]; n1; n1 = n1->next) { + for (n1 = rcu_dereference_protected(nht->hash_buckets[hash_val], + lockdep_is_held(&tbl->lock)); + n1 != NULL; + n1 = rcu_dereference_protected(n1->next, + lockdep_is_held(&tbl->lock))) { if (dev == n1->dev && !memcmp(n1->primary_key, pkey, key_len)) { neigh_hold(n1); rc = n1; @@ -491,10 +514,12 @@ struct neighbour *neigh_create(struct neigh_table *tbl, const void *pkey, } } - n->next = nht->hash_buckets[hash_val]; - nht->hash_buckets[hash_val] = n; n->dead = 0; neigh_hold(n); + rcu_assign_pointer(n->next, + rcu_dereference_protected(nht->hash_buckets[hash_val], + lockdep_is_held(&tbl->lock))); + rcu_assign_pointer(nht->hash_buckets[hash_val], n); write_unlock_bh(&tbl->lock); NEIGH_PRINTK2("neigh %p is created.\n", n); rc = n; @@ -651,6 +676,12 @@ static inline void neigh_parms_put(struct neigh_parms *parms) neigh_parms_destroy(parms); } +static void neigh_destroy_rcu(struct rcu_head *head) +{ + struct neighbour *neigh = container_of(head, struct neighbour, rcu); + + kmem_cache_free(neigh->tbl->kmem_cachep, neigh); +} /* * neighbour must already be out of the table; * @@ -690,7 +721,7 @@ void neigh_destroy(struct neighbour *neigh) NEIGH_PRINTK2("neigh %p is destroyed.\n", neigh); atomic_dec(&neigh->tbl->entries); - kmem_cache_free(neigh->tbl->kmem_cachep, neigh); + call_rcu(&neigh->rcu, neigh_destroy_rcu); } EXPORT_SYMBOL(neigh_destroy); @@ -731,7 +762,8 @@ static void neigh_connect(struct neighbour *neigh) static void neigh_periodic_work(struct work_struct *work) { struct neigh_table *tbl = container_of(work, struct neigh_table, gc_work.work); - struct neighbour *n, **np; + struct neighbour *n; + struct neighbour __rcu **np; unsigned int i; struct neigh_hash_table *nht; @@ -756,7 +788,8 @@ static void neigh_periodic_work(struct work_struct *work) for (i = 0 ; i <= nht->hash_mask; i++) { np = &nht->hash_buckets[i]; - while ((n = *np) != NULL) { + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { unsigned int state; write_lock(&n->lock); @@ -1213,8 +1246,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, } /* This function can be used in contexts, where only old dev_queue_xmit - worked, f.e. if you want to override normal output path (eql, shaper), - but resolution is not made yet. + * worked, f.e. if you want to override normal output path (eql, shaper), + * but resolution is not made yet. */ int neigh_compat_output(struct sk_buff *skb) @@ -2123,7 +2156,7 @@ static void neigh_update_notify(struct neighbour *neigh) static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, struct netlink_callback *cb) { - struct net * net = sock_net(skb->sk); + struct net *net = sock_net(skb->sk); struct neighbour *n; int rc, h, s_h = cb->args[1]; int idx, s_idx = idx = cb->args[2]; @@ -2132,13 +2165,14 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); for (h = 0; h <= nht->hash_mask; h++) { if (h < s_h) continue; if (h > s_h) s_idx = 0; - for (n = nht->hash_buckets[h], idx = 0; n; n = n->next) { + for (n = rcu_dereference_bh(nht->hash_buckets[h]), idx = 0; + n != NULL; + n = rcu_dereference_bh(n->next)) { if (!net_eq(dev_net(n->dev), net)) continue; if (idx < s_idx) @@ -2150,13 +2184,12 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, rc = -1; goto out; } - next: +next: idx++; } } rc = skb->len; out: - read_unlock(&tbl->lock); rcu_read_unlock_bh(); cb->args[1] = h; cb->args[2] = idx; @@ -2195,11 +2228,13 @@ void neigh_for_each(struct neigh_table *tbl, void (*cb)(struct neighbour *, void rcu_read_lock_bh(); nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); + read_lock(&tbl->lock); /* avoid resizes */ for (chain = 0; chain <= nht->hash_mask; chain++) { struct neighbour *n; - for (n = nht->hash_buckets[chain]; n; n = n->next) + for (n = rcu_dereference_bh(nht->hash_buckets[chain]); + n != NULL; + n = rcu_dereference_bh(n->next)) cb(n, cookie); } read_unlock(&tbl->lock); @@ -2217,16 +2252,20 @@ void __neigh_for_each_release(struct neigh_table *tbl, nht = rcu_dereference_protected(tbl->nht, lockdep_is_held(&tbl->lock)); for (chain = 0; chain <= nht->hash_mask; chain++) { - struct neighbour *n, **np; + struct neighbour *n; + struct neighbour __rcu **np; np = &nht->hash_buckets[chain]; - while ((n = *np) != NULL) { + while ((n = rcu_dereference_protected(*np, + lockdep_is_held(&tbl->lock))) != NULL) { int release; write_lock(&n->lock); release = cb(n); if (release) { - *np = n->next; + rcu_assign_pointer(*np, + rcu_dereference_protected(n->next, + lockdep_is_held(&tbl->lock))); n->dead = 1; } else np = &n->next; @@ -2250,7 +2289,7 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) state->flags &= ~NEIGH_SEQ_IS_PNEIGH; for (bucket = 0; bucket <= nht->hash_mask; bucket++) { - n = nht->hash_buckets[bucket]; + n = rcu_dereference_bh(nht->hash_buckets[bucket]); while (n) { if (!net_eq(dev_net(n->dev), net)) @@ -2267,8 +2306,8 @@ static struct neighbour *neigh_get_first(struct seq_file *seq) break; if (n->nud_state & ~NUD_NOARP) break; - next: - n = n->next; +next: + n = rcu_dereference_bh(n->next); } if (n) @@ -2292,7 +2331,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (v) return n; } - n = n->next; + n = rcu_dereference_bh(n->next); while (1) { while (n) { @@ -2309,8 +2348,8 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (n->nud_state & ~NUD_NOARP) break; - next: - n = n->next; +next: + n = rcu_dereference_bh(n->next); } if (n) @@ -2319,7 +2358,7 @@ static struct neighbour *neigh_get_next(struct seq_file *seq, if (++state->bucket > nht->hash_mask) break; - n = nht->hash_buckets[state->bucket]; + n = rcu_dereference_bh(nht->hash_buckets[state->bucket]); } if (n && pos) @@ -2417,7 +2456,6 @@ static void *neigh_get_idx_any(struct seq_file *seq, loff_t *pos) } void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl, unsigned int neigh_seq_flags) - __acquires(tbl->lock) __acquires(rcu_bh) { struct neigh_seq_state *state = seq->private; @@ -2428,7 +2466,7 @@ void *neigh_seq_start(struct seq_file *seq, loff_t *pos, struct neigh_table *tbl rcu_read_lock_bh(); state->nht = rcu_dereference_bh(tbl->nht); - read_lock(&tbl->lock); + return *pos ? neigh_get_idx_any(seq, pos) : SEQ_START_TOKEN; } EXPORT_SYMBOL(neigh_seq_start); @@ -2461,13 +2499,8 @@ out: EXPORT_SYMBOL(neigh_seq_next); void neigh_seq_stop(struct seq_file *seq, void *v) - __releases(tbl->lock) __releases(rcu_bh) { - struct neigh_seq_state *state = seq->private; - struct neigh_table *tbl = state->tbl; - - read_unlock(&tbl->lock); rcu_read_unlock_bh(); } EXPORT_SYMBOL(neigh_seq_stop); -- cgit v1.2.3 From 388ac775be95e510c2095ed6cd59422a5183a9fb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 7 Oct 2010 13:11:09 +0200 Subject: cfg80211: constify WDS address There's no need for the WDS peer address to not be const, so make it const. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 2 +- net/mac80211/cfg.c | 2 +- net/wireless/nl80211.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index e76daaa7dc2..0778d04b3bb 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1229,7 +1229,7 @@ struct cfg80211_ops { int (*get_tx_power)(struct wiphy *wiphy, int *dbm); int (*set_wds_peer)(struct wiphy *wiphy, struct net_device *dev, - u8 *addr); + const u8 *addr); void (*rfkill_poll)(struct wiphy *wiphy); diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 2e5a3fb38ef..ecf9b7166ed 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1363,7 +1363,7 @@ static int ieee80211_get_tx_power(struct wiphy *wiphy, int *dbm) } static int ieee80211_set_wds_peer(struct wiphy *wiphy, struct net_device *dev, - u8 *addr) + const u8 *addr) { struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev); diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 4fed1663aba..882dc921103 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -879,7 +879,7 @@ static int nl80211_set_wds_peer(struct sk_buff *skb, struct genl_info *info) struct cfg80211_registered_device *rdev = info->user_ptr[0]; struct net_device *dev = info->user_ptr[1]; struct wireless_dev *wdev = dev->ieee80211_ptr; - u8 *bssid; + const u8 *bssid; if (!info->attrs[NL80211_ATTR_MAC]) return -EINVAL; -- cgit v1.2.3 From 5a5c731aa59cc2c44ca20f45b1a577cd4f5435e2 Mon Sep 17 00:00:00 2001 From: Ben Greear Date: Thu, 7 Oct 2010 16:39:20 -0700 Subject: wireless: Set some stats used by /proc/net/wireless (wext) Some stats for /proc/net/wireless (and wext in general) are not being set. This patch addresses a few of those with values easily obtained from mac80211 core. Signed-off-by: Ben Greear Signed-off-by: John W. Linville --- include/net/cfg80211.h | 4 ++++ net/mac80211/cfg.c | 4 +++- net/wireless/wext-compat.c | 4 ++++ 3 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 0778d04b3bb..f920a06f363 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -403,6 +403,7 @@ struct station_parameters { * @STATION_INFO_TX_PACKETS: @tx_packets filled * @STATION_INFO_TX_RETRIES: @tx_retries filled * @STATION_INFO_TX_FAILED: @tx_failed filled + * @STATION_INFO_RX_DROP_MISC: @rx_dropped_misc filled */ enum station_info_flags { STATION_INFO_INACTIVE_TIME = 1<<0, @@ -417,6 +418,7 @@ enum station_info_flags { STATION_INFO_TX_PACKETS = 1<<9, STATION_INFO_TX_RETRIES = 1<<10, STATION_INFO_TX_FAILED = 1<<11, + STATION_INFO_RX_DROP_MISC = 1<<12, }; /** @@ -468,6 +470,7 @@ struct rate_info { * @tx_packets: packets transmitted to this station * @tx_retries: cumulative retry counts * @tx_failed: number of failed transmissions (retries exceeded, no ACK) + * @rx_dropped_misc: Dropped for un-specified reason. * @generation: generation number for nl80211 dumps. * This number should increase every time the list of stations * changes, i.e. when a station is added or removed, so that @@ -487,6 +490,7 @@ struct station_info { u32 tx_packets; u32 tx_retries; u32 tx_failed; + u32 rx_dropped_misc; int generation; }; diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index ecf9b7166ed..25fb351e00f 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -329,7 +329,8 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) STATION_INFO_TX_PACKETS | STATION_INFO_TX_RETRIES | STATION_INFO_TX_FAILED | - STATION_INFO_TX_BITRATE; + STATION_INFO_TX_BITRATE | + STATION_INFO_RX_DROP_MISC; sinfo->inactive_time = jiffies_to_msecs(jiffies - sta->last_rx); sinfo->rx_bytes = sta->rx_bytes; @@ -338,6 +339,7 @@ static void sta_set_sinfo(struct sta_info *sta, struct station_info *sinfo) sinfo->tx_packets = sta->tx_packets; sinfo->tx_retries = sta->tx_retry_count; sinfo->tx_failed = sta->tx_retry_failed; + sinfo->rx_dropped_misc = sta->rx_dropped; if ((sta->local->hw.flags & IEEE80211_HW_SIGNAL_DBM) || (sta->local->hw.flags & IEEE80211_HW_SIGNAL_UNSPEC)) { diff --git a/net/wireless/wext-compat.c b/net/wireless/wext-compat.c index 6002265289c..12222ee6ebf 100644 --- a/net/wireless/wext-compat.c +++ b/net/wireless/wext-compat.c @@ -1366,6 +1366,10 @@ struct iw_statistics *cfg80211_wireless_stats(struct net_device *dev) } wstats.qual.updated |= IW_QUAL_NOISE_INVALID; + if (sinfo.filled & STATION_INFO_RX_DROP_MISC) + wstats.discard.misc = sinfo.rx_dropped_misc; + if (sinfo.filled & STATION_INFO_TX_FAILED) + wstats.discard.retries = sinfo.tx_failed; return &wstats; } -- cgit v1.2.3 From 8610c29a2c9f273886b1c31ae4d92c69d4326262 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 9 Oct 2010 02:39:29 +0200 Subject: cfg80211: add channel utilization stats to the survey command Using these, user space can calculate a relative channel utilization with arbitrary intervals by regularly taking snapshots of the survey results. Signed-off-by: Felix Fietkau Signed-off-by: John W. Linville --- include/linux/nl80211.h | 15 +++++++++++++++ include/net/cfg80211.h | 20 ++++++++++++++++++++ net/wireless/nl80211.c | 15 +++++++++++++++ 3 files changed, 50 insertions(+) (limited to 'include/net') diff --git a/include/linux/nl80211.h b/include/linux/nl80211.h index c08709fe36f..0edb2566c14 100644 --- a/include/linux/nl80211.h +++ b/include/linux/nl80211.h @@ -1413,6 +1413,16 @@ enum nl80211_reg_rule_flags { * @NL80211_SURVEY_INFO_FREQUENCY: center frequency of channel * @NL80211_SURVEY_INFO_NOISE: noise level of channel (u8, dBm) * @NL80211_SURVEY_INFO_IN_USE: channel is currently being used + * @NL80211_SURVEY_INFO_CHANNEL_TIME: amount of time (in ms) that the radio + * spent on this channel + * @NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY: amount of the time the primary + * channel was sensed busy (either due to activity or energy detect) + * @NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: amount of time the extension + * channel was sensed busy + * @NL80211_SURVEY_INFO_CHANNEL_TIME_RX: amount of time the radio spent + * receiving data + * @NL80211_SURVEY_INFO_CHANNEL_TIME_TX: amount of time the radio spent + * transmitting data * @NL80211_SURVEY_INFO_MAX: highest survey info attribute number * currently defined * @__NL80211_SURVEY_INFO_AFTER_LAST: internal use @@ -1422,6 +1432,11 @@ enum nl80211_survey_info { NL80211_SURVEY_INFO_FREQUENCY, NL80211_SURVEY_INFO_NOISE, NL80211_SURVEY_INFO_IN_USE, + NL80211_SURVEY_INFO_CHANNEL_TIME, + NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, + NL80211_SURVEY_INFO_CHANNEL_TIME_RX, + NL80211_SURVEY_INFO_CHANNEL_TIME_TX, /* keep last */ __NL80211_SURVEY_INFO_AFTER_LAST, diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index f920a06f363..24d5b586927 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -294,6 +294,11 @@ struct key_params { * * @SURVEY_INFO_NOISE_DBM: noise (in dBm) was filled in * @SURVEY_INFO_IN_USE: channel is currently being used + * @SURVEY_INFO_CHANNEL_TIME: channel active time (in ms) was filled in + * @SURVEY_INFO_CHANNEL_TIME_BUSY: channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_EXT_BUSY: extension channel busy time was filled in + * @SURVEY_INFO_CHANNEL_TIME_RX: channel receive time was filled in + * @SURVEY_INFO_CHANNEL_TIME_TX: channel transmit time was filled in * * Used by the driver to indicate which info in &struct survey_info * it has filled in during the get_survey(). @@ -301,6 +306,11 @@ struct key_params { enum survey_info_flags { SURVEY_INFO_NOISE_DBM = 1<<0, SURVEY_INFO_IN_USE = 1<<1, + SURVEY_INFO_CHANNEL_TIME = 1<<2, + SURVEY_INFO_CHANNEL_TIME_BUSY = 1<<3, + SURVEY_INFO_CHANNEL_TIME_EXT_BUSY = 1<<4, + SURVEY_INFO_CHANNEL_TIME_RX = 1<<5, + SURVEY_INFO_CHANNEL_TIME_TX = 1<<6, }; /** @@ -310,6 +320,11 @@ enum survey_info_flags { * @filled: bitflag of flags from &enum survey_info_flags * @noise: channel noise in dBm. This and all following fields are * optional + * @channel_time: amount of time in ms the radio spent on the channel + * @channel_time_busy: amount of time the primary channel was sensed busy + * @channel_time_ext_busy: amount of time the extension channel was sensed busy + * @channel_time_rx: amount of time the radio spent receiving data + * @channel_time_tx: amount of time the radio spent transmitting data * * Used by dump_survey() to report back per-channel survey information. * @@ -318,6 +333,11 @@ enum survey_info_flags { */ struct survey_info { struct ieee80211_channel *channel; + u64 channel_time; + u64 channel_time_busy; + u64 channel_time_ext_busy; + u64 channel_time_rx; + u64 channel_time_tx; u32 filled; s8 noise; }; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 882dc921103..c506241f863 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -3153,6 +3153,21 @@ static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, survey->noise); if (survey->filled & SURVEY_INFO_IN_USE) NLA_PUT_FLAG(msg, NL80211_SURVEY_INFO_IN_USE); + if (survey->filled & SURVEY_INFO_CHANNEL_TIME) + NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME, + survey->channel_time); + if (survey->filled & SURVEY_INFO_CHANNEL_TIME_BUSY) + NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_BUSY, + survey->channel_time_busy); + if (survey->filled & SURVEY_INFO_CHANNEL_TIME_EXT_BUSY) + NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_EXT_BUSY, + survey->channel_time_ext_busy); + if (survey->filled & SURVEY_INFO_CHANNEL_TIME_RX) + NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_RX, + survey->channel_time_rx); + if (survey->filled & SURVEY_INFO_CHANNEL_TIME_TX) + NLA_PUT_U64(msg, NL80211_SURVEY_INFO_CHANNEL_TIME_TX, + survey->channel_time_tx); nla_nest_end(msg, infoattr); -- cgit v1.2.3 From 0ed8ddf4045fcfcac36bad753dc4046118c603ec Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 7 Oct 2010 10:44:07 +0000 Subject: neigh: Protect neigh->ha[] with a seqlock Add a seqlock in struct neighbour to protect neigh->ha[], and avoid dirtying neighbour in stress situation (many different flows / dsts) Dirtying takes place because of read_lock(&n->lock) and n->used writes. Switching to a seqlock, and writing n->used only on jiffies changes permits less dirtying. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 16 +++++++++++++++- net/core/neighbour.c | 47 ++++++++++++++++++++++++++++++----------------- net/ipv4/arp.c | 6 ++---- net/sched/sch_teql.c | 8 ++++---- 4 files changed, 51 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index a4538d55370..f04e7a2522c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -105,6 +105,7 @@ struct neighbour { atomic_t refcnt; atomic_t probes; rwlock_t lock; + seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); @@ -302,7 +303,10 @@ static inline void neigh_confirm(struct neighbour *neigh) static inline int neigh_event_send(struct neighbour *neigh, struct sk_buff *skb) { - neigh->used = jiffies; + unsigned long now = ACCESS_ONCE(jiffies); + + if (neigh->used != now) + neigh->used = now; if (!(neigh->nud_state&(NUD_CONNECTED|NUD_DELAY|NUD_PROBE))) return __neigh_event_send(neigh, skb); return 0; @@ -373,4 +377,14 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) +static inline void neigh_ha_snapshot(char *dst, const struct neighbour *n, + const struct net_device *dev) +{ + unsigned int seq; + + do { + seq = read_seqbegin(&n->ha_lock); + memcpy(dst, n->ha, dev->addr_len); + } while (read_seqretry(&n->ha_lock, seq)); +} #endif diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 2044906ecd1..b165b96355b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -294,6 +294,7 @@ static struct neighbour *neigh_alloc(struct neigh_table *tbl) skb_queue_head_init(&n->arp_queue); rwlock_init(&n->lock); + seqlock_init(&n->ha_lock); n->updated = n->used = now; n->nud_state = NUD_NONE; n->output = neigh_blackhole; @@ -1015,7 +1016,7 @@ out_unlock_bh: } EXPORT_SYMBOL(__neigh_event_send); -static void neigh_update_hhs(struct neighbour *neigh) +static void neigh_update_hhs(const struct neighbour *neigh) { struct hh_cache *hh; void (*update)(struct hh_cache*, const struct net_device*, const unsigned char *) @@ -1151,7 +1152,9 @@ int neigh_update(struct neighbour *neigh, const u8 *lladdr, u8 new, } if (lladdr != neigh->ha) { + write_seqlock(&neigh->ha_lock); memcpy(&neigh->ha, lladdr, dev->addr_len); + write_sequnlock(&neigh->ha_lock); neigh_update_hhs(neigh); if (!(new & NUD_CONNECTED)) neigh->confirmed = jiffies - @@ -1214,6 +1217,7 @@ static inline bool neigh_hh_lookup(struct neighbour *n, struct dst_entry *dst, { struct hh_cache *hh; + smp_rmb(); /* paired with smp_wmb() in neigh_hh_init() */ for (hh = n->hh; hh; hh = hh->hh_next) { if (hh->hh_type == protocol) { atomic_inc(&hh->hh_refcnt); @@ -1248,8 +1252,8 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, kfree(hh); return; } - read_unlock(&n->lock); - write_lock(&n->lock); + + write_lock_bh(&n->lock); /* must check if another thread already did the insert */ if (neigh_hh_lookup(n, dst, protocol)) { @@ -1263,13 +1267,13 @@ static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, hh->hh_output = n->ops->output; hh->hh_next = n->hh; + smp_wmb(); /* paired with smp_rmb() in neigh_hh_lookup() */ n->hh = hh; if (unlikely(cmpxchg(&dst->hh, NULL, hh) != NULL)) hh_cache_put(hh); end: - write_unlock(&n->lock); - read_lock(&n->lock); + write_unlock_bh(&n->lock); } /* This function can be used in contexts, where only old dev_queue_xmit @@ -1308,16 +1312,18 @@ int neigh_resolve_output(struct sk_buff *skb) if (!neigh_event_send(neigh, skb)) { int err; struct net_device *dev = neigh->dev; + unsigned int seq; - read_lock_bh(&neigh->lock); if (dev->header_ops->cache && !dst->hh && !(dst->flags & DST_NOCACHE)) neigh_hh_init(neigh, dst, dst->ops->protocol); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); + do { + seq = read_seqbegin(&neigh->ha_lock); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + } while (read_seqretry(&neigh->ha_lock, seq)); if (err >= 0) rc = neigh->ops->queue_xmit(skb); @@ -1344,13 +1350,16 @@ int neigh_connected_output(struct sk_buff *skb) struct dst_entry *dst = skb_dst(skb); struct neighbour *neigh = dst->neighbour; struct net_device *dev = neigh->dev; + unsigned int seq; __skb_pull(skb, skb_network_offset(skb)); - read_lock_bh(&neigh->lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - neigh->ha, NULL, skb->len); - read_unlock_bh(&neigh->lock); + do { + seq = read_seqbegin(&neigh->ha_lock); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), + neigh->ha, NULL, skb->len); + } while (read_seqretry(&neigh->ha_lock, seq)); + if (err >= 0) err = neigh->ops->queue_xmit(skb); else { @@ -2148,10 +2157,14 @@ static int neigh_fill_info(struct sk_buff *skb, struct neighbour *neigh, read_lock_bh(&neigh->lock); ndm->ndm_state = neigh->nud_state; - if ((neigh->nud_state & NUD_VALID) && - nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, neigh->ha) < 0) { - read_unlock_bh(&neigh->lock); - goto nla_put_failure; + if (neigh->nud_state & NUD_VALID) { + char haddr[MAX_ADDR_LEN]; + + neigh_ha_snapshot(haddr, neigh, neigh->dev); + if (nla_put(skb, NDA_LLADDR, neigh->dev->addr_len, haddr) < 0) { + read_unlock_bh(&neigh->lock); + goto nla_put_failure; + } } ci.ndm_used = jiffies_to_clock_t(now - neigh->used); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index f3530957817..d8e540c5b07 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -502,10 +502,8 @@ int arp_find(unsigned char *haddr, struct sk_buff *skb) if (n) { n->used = jiffies; - if (n->nud_state&NUD_VALID || neigh_event_send(n, skb) == 0) { - read_lock_bh(&n->lock); - memcpy(haddr, n->ha, dev->addr_len); - read_unlock_bh(&n->lock); + if (n->nud_state & NUD_VALID || neigh_event_send(n, skb) == 0) { + neigh_ha_snapshot(haddr, n, dev); neigh_release(n); return 0; } diff --git a/net/sched/sch_teql.c b/net/sched/sch_teql.c index feaabc103ce..401af959670 100644 --- a/net/sched/sch_teql.c +++ b/net/sched/sch_teql.c @@ -241,11 +241,11 @@ __teql_resolve(struct sk_buff *skb, struct sk_buff *skb_res, struct net_device * } if (neigh_event_send(n, skb_res) == 0) { int err; + char haddr[MAX_ADDR_LEN]; - read_lock(&n->lock); - err = dev_hard_header(skb, dev, ntohs(skb->protocol), - n->ha, NULL, skb->len); - read_unlock(&n->lock); + neigh_ha_snapshot(haddr, n, dev); + err = dev_hard_header(skb, dev, ntohs(skb->protocol), haddr, + NULL, skb->len); if (err < 0) { neigh_release(n); -- cgit v1.2.3 From fc66f95c68b6d4535a0ea2ea15d5cf626e310956 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 8 Oct 2010 06:37:34 +0000 Subject: net dst: use a percpu_counter to track entries struct dst_ops tracks number of allocated dst in an atomic_t field, subject to high cache line contention in stress workload. Switch to a percpu_counter, to reduce number of time we need to dirty a central location. Place it on a separate cache line to avoid dirtying read only fields. Stress test : (Sending 160.000.000 UDP frames, IP route cache disabled, dual E5540 @2.53GHz, 32bit kernel, FIB_TRIE, SLUB/NUMA) Before: real 0m51.179s user 0m15.329s sys 10m15.942s After: real 0m45.570s user 0m15.525s sys 9m56.669s With a small reordering of struct neighbour fields, subject of a following patch, (to separate refcnt from other read mostly fields) real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/dst_ops.h | 37 ++++++++++++++++++++++++++++++++++++- net/bridge/br_netfilter.c | 11 +++++++++-- net/core/dst.c | 6 +++--- net/decnet/dn_route.c | 3 ++- net/ipv4/route.c | 36 ++++++++++++++++++++++-------------- net/ipv4/xfrm4_policy.c | 4 ++-- net/ipv6/route.c | 28 ++++++++++++++++++++-------- net/ipv6/xfrm6_policy.c | 10 ++++++---- 8 files changed, 100 insertions(+), 35 deletions(-) (limited to 'include/net') diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index d1ff9b7e99b..1fa5306e3e2 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -1,6 +1,7 @@ #ifndef _NET_DST_OPS_H #define _NET_DST_OPS_H #include +#include struct dst_entry; struct kmem_cachep; @@ -22,7 +23,41 @@ struct dst_ops { void (*update_pmtu)(struct dst_entry *dst, u32 mtu); int (*local_out)(struct sk_buff *skb); - atomic_t entries; struct kmem_cache *kmem_cachep; + + struct percpu_counter pcpuc_entries ____cacheline_aligned_in_smp; }; + +static inline int dst_entries_get_fast(struct dst_ops *dst) +{ + return percpu_counter_read_positive(&dst->pcpuc_entries); +} + +static inline int dst_entries_get_slow(struct dst_ops *dst) +{ + int res; + + local_bh_disable(); + res = percpu_counter_sum_positive(&dst->pcpuc_entries); + local_bh_enable(); + return res; +} + +static inline void dst_entries_add(struct dst_ops *dst, int val) +{ + local_bh_disable(); + percpu_counter_add(&dst->pcpuc_entries, val); + local_bh_enable(); +} + +static inline int dst_entries_init(struct dst_ops *dst) +{ + return percpu_counter_init(&dst->pcpuc_entries, 0); +} + +static inline void dst_entries_destroy(struct dst_ops *dst) +{ + percpu_counter_destroy(&dst->pcpuc_entries); +} + #endif diff --git a/net/bridge/br_netfilter.c b/net/bridge/br_netfilter.c index 77f7b5fda45..7f9ce9600ef 100644 --- a/net/bridge/br_netfilter.c +++ b/net/bridge/br_netfilter.c @@ -106,7 +106,6 @@ static struct dst_ops fake_dst_ops = { .family = AF_INET, .protocol = cpu_to_be16(ETH_P_IP), .update_pmtu = fake_update_pmtu, - .entries = ATOMIC_INIT(0), }; /* @@ -1003,15 +1002,22 @@ int __init br_netfilter_init(void) { int ret; - ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + ret = dst_entries_init(&fake_dst_ops); if (ret < 0) return ret; + + ret = nf_register_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + if (ret < 0) { + dst_entries_destroy(&fake_dst_ops); + return ret; + } #ifdef CONFIG_SYSCTL brnf_sysctl_header = register_sysctl_paths(brnf_path, brnf_table); if (brnf_sysctl_header == NULL) { printk(KERN_WARNING "br_netfilter: can't register to sysctl.\n"); nf_unregister_hooks(br_nf_ops, ARRAY_SIZE(br_nf_ops)); + dst_entries_destroy(&fake_dst_ops); return -ENOMEM; } #endif @@ -1025,4 +1031,5 @@ void br_netfilter_fini(void) #ifdef CONFIG_SYSCTL unregister_sysctl_table(brnf_sysctl_header); #endif + dst_entries_destroy(&fake_dst_ops); } diff --git a/net/core/dst.c b/net/core/dst.c index 978a1ee1f7d..32e542d7f47 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -168,7 +168,7 @@ void *dst_alloc(struct dst_ops *ops) { struct dst_entry *dst; - if (ops->gc && atomic_read(&ops->entries) > ops->gc_thresh) { + if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) { if (ops->gc(ops)) return NULL; } @@ -183,7 +183,7 @@ void *dst_alloc(struct dst_ops *ops) #if RT_CACHE_DEBUG >= 2 atomic_inc(&dst_total); #endif - atomic_inc(&ops->entries); + dst_entries_add(ops, 1); return dst; } EXPORT_SYMBOL(dst_alloc); @@ -236,7 +236,7 @@ again: neigh_release(neigh); } - atomic_dec(&dst->ops->entries); + dst_entries_add(dst->ops, -1); if (dst->ops->destroy) dst->ops->destroy(dst); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 6585ea6d118..df0f3e54ff8 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -132,7 +132,6 @@ static struct dst_ops dn_dst_ops = { .negative_advice = dn_dst_negative_advice, .link_failure = dn_dst_link_failure, .update_pmtu = dn_dst_update_pmtu, - .entries = ATOMIC_INIT(0), }; static __inline__ unsigned dn_hash(__le16 src, __le16 dst) @@ -1758,6 +1757,7 @@ void __init dn_route_init(void) dn_dst_ops.kmem_cachep = kmem_cache_create("dn_dst_cache", sizeof(struct dn_route), 0, SLAB_HWCACHE_ALIGN|SLAB_PANIC, NULL); + dst_entries_init(&dn_dst_ops); setup_timer(&dn_route_timer, dn_dst_check_expire, 0); dn_route_timer.expires = jiffies + decnet_dst_gc_interval * HZ; add_timer(&dn_route_timer); @@ -1816,5 +1816,6 @@ void __exit dn_route_cleanup(void) dn_run_flush(0); proc_net_remove(&init_net, "decnet_cache"); + dst_entries_destroy(&dn_dst_ops); } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 3888f6ba0a5..0755aa4af86 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -159,7 +159,6 @@ static struct dst_ops ipv4_dst_ops = { .link_failure = ipv4_link_failure, .update_pmtu = ip_rt_update_pmtu, .local_out = __ip_local_out, - .entries = ATOMIC_INIT(0), }; #define ECN_OR_COST(class) TC_PRIO_##class @@ -466,7 +465,7 @@ static int rt_cpu_seq_show(struct seq_file *seq, void *v) seq_printf(seq,"%08x %08x %08x %08x %08x %08x %08x %08x " " %08x %08x %08x %08x %08x %08x %08x %08x %08x \n", - atomic_read(&ipv4_dst_ops.entries), + dst_entries_get_slow(&ipv4_dst_ops), st->in_hit, st->in_slow_tot, st->in_slow_mc, @@ -945,6 +944,7 @@ static int rt_garbage_collect(struct dst_ops *ops) struct rtable *rth, **rthp; unsigned long now = jiffies; int goal; + int entries = dst_entries_get_fast(&ipv4_dst_ops); /* * Garbage collection is pretty expensive, @@ -954,28 +954,28 @@ static int rt_garbage_collect(struct dst_ops *ops) RT_CACHE_STAT_INC(gc_total); if (now - last_gc < ip_rt_gc_min_interval && - atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) { + entries < ip_rt_max_size) { RT_CACHE_STAT_INC(gc_ignored); goto out; } + entries = dst_entries_get_slow(&ipv4_dst_ops); /* Calculate number of entries, which we want to expire now. */ - goal = atomic_read(&ipv4_dst_ops.entries) - - (ip_rt_gc_elasticity << rt_hash_log); + goal = entries - (ip_rt_gc_elasticity << rt_hash_log); if (goal <= 0) { if (equilibrium < ipv4_dst_ops.gc_thresh) equilibrium = ipv4_dst_ops.gc_thresh; - goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; + goal = entries - equilibrium; if (goal > 0) { equilibrium += min_t(unsigned int, goal >> 1, rt_hash_mask + 1); - goal = atomic_read(&ipv4_dst_ops.entries) - equilibrium; + goal = entries - equilibrium; } } else { /* We are in dangerous area. Try to reduce cache really * aggressively. */ goal = max_t(unsigned int, goal >> 1, rt_hash_mask + 1); - equilibrium = atomic_read(&ipv4_dst_ops.entries) - goal; + equilibrium = entries - goal; } if (now - last_gc >= ip_rt_gc_min_interval) @@ -1032,14 +1032,16 @@ static int rt_garbage_collect(struct dst_ops *ops) expire >>= 1; #if RT_CACHE_DEBUG >= 2 printk(KERN_DEBUG "expire>> %u %d %d %d\n", expire, - atomic_read(&ipv4_dst_ops.entries), goal, i); + dst_entries_get_fast(&ipv4_dst_ops), goal, i); #endif - if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) + if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) goto out; } while (!in_softirq() && time_before_eq(jiffies, now)); - if (atomic_read(&ipv4_dst_ops.entries) < ip_rt_max_size) + if (dst_entries_get_fast(&ipv4_dst_ops) < ip_rt_max_size) + goto out; + if (dst_entries_get_slow(&ipv4_dst_ops) < ip_rt_max_size) goto out; if (net_ratelimit()) printk(KERN_WARNING "dst cache overflow\n"); @@ -1049,11 +1051,12 @@ static int rt_garbage_collect(struct dst_ops *ops) work_done: expire += ip_rt_gc_min_interval; if (expire > ip_rt_gc_timeout || - atomic_read(&ipv4_dst_ops.entries) < ipv4_dst_ops.gc_thresh) + dst_entries_get_fast(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh || + dst_entries_get_slow(&ipv4_dst_ops) < ipv4_dst_ops.gc_thresh) expire = ip_rt_gc_timeout; #if RT_CACHE_DEBUG >= 2 printk(KERN_DEBUG "expire++ %u %d %d %d\n", expire, - atomic_read(&ipv4_dst_ops.entries), goal, rover); + dst_entries_get_fast(&ipv4_dst_ops), goal, rover); #endif out: return 0; } @@ -2717,7 +2720,6 @@ static struct dst_ops ipv4_dst_blackhole_ops = { .destroy = ipv4_dst_destroy, .check = ipv4_blackhole_dst_check, .update_pmtu = ipv4_rt_blackhole_update_pmtu, - .entries = ATOMIC_INIT(0), }; @@ -3287,6 +3289,12 @@ int __init ip_rt_init(void) ipv4_dst_blackhole_ops.kmem_cachep = ipv4_dst_ops.kmem_cachep; + if (dst_entries_init(&ipv4_dst_ops) < 0) + panic("IP: failed to allocate ipv4_dst_ops counter\n"); + + if (dst_entries_init(&ipv4_dst_blackhole_ops) < 0) + panic("IP: failed to allocate ipv4_dst_blackhole_ops counter\n"); + rt_hash_table = (struct rt_hash_bucket *) alloc_large_system_hash("IP route cache", sizeof(struct rt_hash_bucket), diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a580349f0b8..4464f3bff6a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -174,7 +174,7 @@ static inline int xfrm4_garbage_collect(struct dst_ops *ops) struct net *net = container_of(ops, struct net, xfrm.xfrm4_dst_ops); xfrm4_policy_afinfo.garbage_collect(net); - return (atomic_read(&ops->entries) > ops->gc_thresh * 2); + return (dst_entries_get_slow(ops) > ops->gc_thresh * 2); } static void xfrm4_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -232,7 +232,6 @@ static struct dst_ops xfrm4_dst_ops = { .ifdown = xfrm4_dst_ifdown, .local_out = __ip_local_out, .gc_thresh = 1024, - .entries = ATOMIC_INIT(0), }; static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { @@ -288,6 +287,7 @@ void __init xfrm4_init(int rt_max_size) * and start cleaning when were 1/2 full */ xfrm4_dst_ops.gc_thresh = rt_max_size/2; + dst_entries_init(&xfrm4_dst_ops); xfrm4_state_init(); xfrm4_policy_init(); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 17e21793388..25661f968f3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -109,7 +109,6 @@ static struct dst_ops ip6_dst_ops_template = { .link_failure = ip6_link_failure, .update_pmtu = ip6_rt_update_pmtu, .local_out = __ip6_local_out, - .entries = ATOMIC_INIT(0), }; static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -122,7 +121,6 @@ static struct dst_ops ip6_dst_blackhole_ops = { .destroy = ip6_dst_destroy, .check = ip6_dst_check, .update_pmtu = ip6_rt_blackhole_update_pmtu, - .entries = ATOMIC_INIT(0), }; static struct rt6_info ip6_null_entry_template = { @@ -1058,19 +1056,22 @@ static int ip6_dst_gc(struct dst_ops *ops) int rt_elasticity = net->ipv6.sysctl.ip6_rt_gc_elasticity; int rt_gc_timeout = net->ipv6.sysctl.ip6_rt_gc_timeout; unsigned long rt_last_gc = net->ipv6.ip6_rt_last_gc; + int entries; + entries = dst_entries_get_fast(ops); if (time_after(rt_last_gc + rt_min_interval, now) && - atomic_read(&ops->entries) <= rt_max_size) + entries <= rt_max_size) goto out; net->ipv6.ip6_rt_gc_expire++; fib6_run_gc(net->ipv6.ip6_rt_gc_expire, net); net->ipv6.ip6_rt_last_gc = now; - if (atomic_read(&ops->entries) < ops->gc_thresh) + entries = dst_entries_get_slow(ops); + if (entries < ops->gc_thresh) net->ipv6.ip6_rt_gc_expire = rt_gc_timeout>>1; out: net->ipv6.ip6_rt_gc_expire -= net->ipv6.ip6_rt_gc_expire>>rt_elasticity; - return atomic_read(&ops->entries) > rt_max_size; + return entries > rt_max_size; } /* Clean host part of a prefix. Not necessary in radix tree, @@ -2524,7 +2525,7 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) net->ipv6.rt6_stats->fib_rt_alloc, net->ipv6.rt6_stats->fib_rt_entries, net->ipv6.rt6_stats->fib_rt_cache, - atomic_read(&net->ipv6.ip6_dst_ops.entries), + dst_entries_get_slow(&net->ipv6.ip6_dst_ops), net->ipv6.rt6_stats->fib_discarded_routes); return 0; @@ -2666,11 +2667,14 @@ static int __net_init ip6_route_net_init(struct net *net) memcpy(&net->ipv6.ip6_dst_ops, &ip6_dst_ops_template, sizeof(net->ipv6.ip6_dst_ops)); + if (dst_entries_init(&net->ipv6.ip6_dst_ops) < 0) + goto out_ip6_dst_ops; + net->ipv6.ip6_null_entry = kmemdup(&ip6_null_entry_template, sizeof(*net->ipv6.ip6_null_entry), GFP_KERNEL); if (!net->ipv6.ip6_null_entry) - goto out_ip6_dst_ops; + goto out_ip6_dst_entries; net->ipv6.ip6_null_entry->dst.path = (struct dst_entry *)net->ipv6.ip6_null_entry; net->ipv6.ip6_null_entry->dst.ops = &net->ipv6.ip6_dst_ops; @@ -2720,6 +2724,8 @@ out_ip6_prohibit_entry: out_ip6_null_entry: kfree(net->ipv6.ip6_null_entry); #endif +out_ip6_dst_entries: + dst_entries_destroy(&net->ipv6.ip6_dst_ops); out_ip6_dst_ops: goto out; } @@ -2758,10 +2764,14 @@ int __init ip6_route_init(void) if (!ip6_dst_ops_template.kmem_cachep) goto out; - ret = register_pernet_subsys(&ip6_route_net_ops); + ret = dst_entries_init(&ip6_dst_blackhole_ops); if (ret) goto out_kmem_cache; + ret = register_pernet_subsys(&ip6_route_net_ops); + if (ret) + goto out_dst_entries; + ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; /* Registering of the loopback is done before this portion of code, @@ -2808,6 +2818,8 @@ out_fib6_init: fib6_gc_cleanup(); out_register_subsys: unregister_pernet_subsys(&ip6_route_net_ops); +out_dst_entries: + dst_entries_destroy(&ip6_dst_blackhole_ops); out_kmem_cache: kmem_cache_destroy(ip6_dst_ops_template.kmem_cachep); goto out; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 39676eac3a3..7e74023ea6e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -199,7 +199,7 @@ static inline int xfrm6_garbage_collect(struct dst_ops *ops) struct net *net = container_of(ops, struct net, xfrm.xfrm6_dst_ops); xfrm6_policy_afinfo.garbage_collect(net); - return atomic_read(&ops->entries) > ops->gc_thresh * 2; + return dst_entries_get_fast(ops) > ops->gc_thresh * 2; } static void xfrm6_update_pmtu(struct dst_entry *dst, u32 mtu) @@ -255,7 +255,6 @@ static struct dst_ops xfrm6_dst_ops = { .ifdown = xfrm6_dst_ifdown, .local_out = __ip6_local_out, .gc_thresh = 1024, - .entries = ATOMIC_INIT(0), }; static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { @@ -312,11 +311,13 @@ int __init xfrm6_init(void) */ gc_thresh = FIB6_TABLE_HASHSZ * 8; xfrm6_dst_ops.gc_thresh = (gc_thresh < 1024) ? 1024 : gc_thresh; + dst_entries_init(&xfrm6_dst_ops); ret = xfrm6_policy_init(); - if (ret) + if (ret) { + dst_entries_destroy(&xfrm6_dst_ops); goto out; - + } ret = xfrm6_state_init(); if (ret) goto out_policy; @@ -341,4 +342,5 @@ void xfrm6_fini(void) //xfrm6_input_fini(); xfrm6_policy_fini(); xfrm6_state_fini(); + dst_entries_destroy(&xfrm6_dst_ops); } -- cgit v1.2.3 From e37ef961e50d74f55e9edb48e54dd2e7963aad39 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Oct 2010 12:20:54 +0000 Subject: neigh: reorder struct neighbour fields MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Le mardi 12 octobre 2010 à 00:02 +0200, Eric Dumazet a écrit : > Here is the followup patch. > > Thanks ! > Oops, this was an old version, the up2date ones also took care of "used" field. I guess its time for a sleep, sorry again. [PATCH net-next V2] neigh: reorder struct neighbour fields (refcnt) and (ha_lock, ha, used, dev, output, ops, primary_key) should be placed on a separate cache lines. refcnt can be often written, while other fields are mostly read. This gave me good result on stress test : before: real 0m45.570s user 0m15.525s sys 9m56.669s After: real 0m41.841s user 0m15.261s sys 8m45.949s Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/neighbour.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index f04e7a2522c..55590ab16b3 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -94,8 +94,6 @@ struct neighbour { struct neighbour __rcu *next; struct neigh_table *tbl; struct neigh_parms *parms; - struct net_device *dev; - unsigned long used; unsigned long confirmed; unsigned long updated; __u8 flags; @@ -103,16 +101,18 @@ struct neighbour { __u8 type; __u8 dead; atomic_t refcnt; + struct sk_buff_head arp_queue; + struct timer_list timer; + unsigned long used; atomic_t probes; rwlock_t lock; seqlock_t ha_lock; unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; int (*output)(struct sk_buff *skb); - struct sk_buff_head arp_queue; - struct timer_list timer; const struct neigh_ops *ops; struct rcu_head rcu; + struct net_device *dev; u8 primary_key[0]; }; -- cgit v1.2.3 From 8f1e1742233cd1c3444dfc6c945a2efb2814e157 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Mon, 9 Aug 2010 17:38:10 -0400 Subject: Bluetooth: HCI devices are either BR/EDR or AMP radios HCI transport drivers may not know what type of radio an AMP device has so only say whether they're BR/EDR or AMP devices. Signed-off-by: David Vrabel Signed-off-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- drivers/bluetooth/btmrvl_main.c | 4 ++-- include/net/bluetooth/hci.h | 2 +- net/bluetooth/hci_sysfs.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/btmrvl_main.c b/drivers/bluetooth/btmrvl_main.c index 0d32ec82e9b..548d1d9e4dd 100644 --- a/drivers/bluetooth/btmrvl_main.c +++ b/drivers/bluetooth/btmrvl_main.c @@ -117,8 +117,8 @@ int btmrvl_process_event(struct btmrvl_private *priv, struct sk_buff *skb) (event->data[2] == MODULE_ALREADY_UP)) ? "Bring-up succeed" : "Bring-up failed"); - if (event->length > 3) - priv->btmrvl_dev.dev_type = event->data[3]; + if (event->length > 3 && event->data[3]) + priv->btmrvl_dev.dev_type = HCI_AMP; else priv->btmrvl_dev.dev_type = HCI_BREDR; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index bcbdd6d4e6d..e30e0083434 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -54,7 +54,7 @@ /* HCI controller types */ #define HCI_BREDR 0x00 -#define HCI_80211 0x01 +#define HCI_AMP 0x01 /* HCI device quirks */ enum { diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 8fb967beee8..1a9f0db027d 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -196,8 +196,8 @@ static inline char *host_typetostr(int type) switch (type) { case HCI_BREDR: return "BR/EDR"; - case HCI_80211: - return "802.11"; + case HCI_AMP: + return "AMP"; default: return "UNKNOWN"; } -- cgit v1.2.3 From 796c86eec84ddfd02281c5071838ed1fefda6b90 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 8 Sep 2010 10:05:27 -0700 Subject: Bluetooth: Add common code for stream-oriented recvmsg() This commit adds a bt_sock_stream_recvmsg() function for use by any Bluetooth code that uses SOCK_STREAM sockets. This code is copied from rfcomm_sock_recvmsg() with minimal modifications to remove RFCOMM-specific functionality and improve readability. L2CAP (with the SOCK_STREAM socket type) and RFCOMM have common needs when it comes to reading data. Proper stream read semantics require that applications can read from a stream one byte at a time and not lose any data. The RFCOMM code already operated on and pulled data from the underlying L2CAP socket, so very few changes were required to make the code more generic for use with non-RFCOMM data over L2CAP. Applications that need more awareness of L2CAP frame boundaries are still free to use SOCK_SEQPACKET sockets, and may verify that they connection did not fall back to basic mode by calling getsockopt(). Signed-off-by: Mat Martineau Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/bluetooth.h | 2 + net/bluetooth/af_bluetooth.c | 109 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 111 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h index 30fce0128dd..d81ea799770 100644 --- a/include/net/bluetooth/bluetooth.h +++ b/include/net/bluetooth/bluetooth.h @@ -126,6 +126,8 @@ int bt_sock_unregister(int proto); void bt_sock_link(struct bt_sock_list *l, struct sock *s); void bt_sock_unlink(struct bt_sock_list *l, struct sock *s); int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, struct msghdr *msg, size_t len, int flags); +int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t len, int flags); uint bt_sock_poll(struct file * file, struct socket *sock, poll_table *wait); int bt_sock_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg); int bt_sock_wait_state(struct sock *sk, int state, unsigned long timeo); diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index ed0f22f5766..c4cf3f59500 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -265,6 +265,115 @@ int bt_sock_recvmsg(struct kiocb *iocb, struct socket *sock, } EXPORT_SYMBOL(bt_sock_recvmsg); +static long bt_sock_data_wait(struct sock *sk, long timeo) +{ + DECLARE_WAITQUEUE(wait, current); + + add_wait_queue(sk_sleep(sk), &wait); + for (;;) { + set_current_state(TASK_INTERRUPTIBLE); + + if (!skb_queue_empty(&sk->sk_receive_queue)) + break; + + if (sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN)) + break; + + if (signal_pending(current) || !timeo) + break; + + set_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + release_sock(sk); + timeo = schedule_timeout(timeo); + lock_sock(sk); + clear_bit(SOCK_ASYNC_WAITDATA, &sk->sk_socket->flags); + } + + __set_current_state(TASK_RUNNING); + remove_wait_queue(sk_sleep(sk), &wait); + return timeo; +} + +int bt_sock_stream_recvmsg(struct kiocb *iocb, struct socket *sock, + struct msghdr *msg, size_t size, int flags) +{ + struct sock *sk = sock->sk; + int err = 0; + size_t target, copied = 0; + long timeo; + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + msg->msg_namelen = 0; + + BT_DBG("sk %p size %zu", sk, size); + + lock_sock(sk); + + target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); + timeo = sock_rcvtimeo(sk, flags & MSG_DONTWAIT); + + do { + struct sk_buff *skb; + int chunk; + + skb = skb_dequeue(&sk->sk_receive_queue); + if (!skb) { + if (copied >= target) + break; + + if ((err = sock_error(sk)) != 0) + break; + if (sk->sk_shutdown & RCV_SHUTDOWN) + break; + + err = -EAGAIN; + if (!timeo) + break; + + timeo = bt_sock_data_wait(sk, timeo); + + if (signal_pending(current)) { + err = sock_intr_errno(timeo); + goto out; + } + continue; + } + + chunk = min_t(unsigned int, skb->len, size); + if (memcpy_toiovec(msg->msg_iov, skb->data, chunk)) { + skb_queue_head(&sk->sk_receive_queue, skb); + if (!copied) + copied = -EFAULT; + break; + } + copied += chunk; + size -= chunk; + + sock_recv_ts_and_drops(msg, sk, skb); + + if (!(flags & MSG_PEEK)) { + skb_pull(skb, chunk); + if (skb->len) { + skb_queue_head(&sk->sk_receive_queue, skb); + break; + } + kfree_skb(skb); + + } else { + /* put message back and return */ + skb_queue_head(&sk->sk_receive_queue, skb); + break; + } + } while (size); + +out: + release_sock(sk); + return copied ? : err; +} +EXPORT_SYMBOL(bt_sock_stream_recvmsg); + static inline unsigned int bt_accept_poll(struct sock *parent) { struct list_head *p, *n; -- cgit v1.2.3 From 534c92fde74c8d2143fc15b5f1d957f42cb43570 Mon Sep 17 00:00:00 2001 From: Andrei Emeltchenko Date: Fri, 1 Oct 2010 12:05:11 +0300 Subject: Bluetooth: clean up rfcomm code Remove dead code and unused rfcomm thread events Signed-off-by: Andrei Emeltchenko Acked-by: Marcel Holtmann Signed-off-by: Gustavo F. Padovan --- include/net/bluetooth/rfcomm.h | 5 ----- net/bluetooth/rfcomm/core.c | 29 ++++++++++++++--------------- 2 files changed, 14 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/rfcomm.h b/include/net/bluetooth/rfcomm.h index a140847d622..71047bc0af8 100644 --- a/include/net/bluetooth/rfcomm.h +++ b/include/net/bluetooth/rfcomm.h @@ -213,11 +213,6 @@ struct rfcomm_dlc { #define RFCOMM_DEFER_SETUP 8 /* Scheduling flags and events */ -#define RFCOMM_SCHED_STATE 0 -#define RFCOMM_SCHED_RX 1 -#define RFCOMM_SCHED_TX 2 -#define RFCOMM_SCHED_TIMEO 3 -#define RFCOMM_SCHED_AUTH 4 #define RFCOMM_SCHED_WAKEUP 31 /* MSC exchange flags */ diff --git a/net/bluetooth/rfcomm/core.c b/net/bluetooth/rfcomm/core.c index 4a22b14a2c6..39a5d87e33b 100644 --- a/net/bluetooth/rfcomm/core.c +++ b/net/bluetooth/rfcomm/core.c @@ -113,11 +113,10 @@ static void rfcomm_session_del(struct rfcomm_session *s); #define __get_rpn_stop_bits(line) (((line) >> 2) & 0x1) #define __get_rpn_parity(line) (((line) >> 3) & 0x7) -static inline void rfcomm_schedule(uint event) +static inline void rfcomm_schedule(void) { if (!rfcomm_thread) return; - //set_bit(event, &rfcomm_event); set_bit(RFCOMM_SCHED_WAKEUP, &rfcomm_event); wake_up_process(rfcomm_thread); } @@ -203,13 +202,13 @@ static inline int __check_fcs(u8 *data, int type, u8 fcs) static void rfcomm_l2state_change(struct sock *sk) { BT_DBG("%p state %d", sk, sk->sk_state); - rfcomm_schedule(RFCOMM_SCHED_STATE); + rfcomm_schedule(); } static void rfcomm_l2data_ready(struct sock *sk, int bytes) { BT_DBG("%p bytes %d", sk, bytes); - rfcomm_schedule(RFCOMM_SCHED_RX); + rfcomm_schedule(); } static int rfcomm_l2sock_create(struct socket **sock) @@ -255,7 +254,7 @@ static void rfcomm_session_timeout(unsigned long arg) BT_DBG("session %p state %ld", s, s->state); set_bit(RFCOMM_TIMED_OUT, &s->flags); - rfcomm_schedule(RFCOMM_SCHED_TIMEO); + rfcomm_schedule(); } static void rfcomm_session_set_timer(struct rfcomm_session *s, long timeout) @@ -283,7 +282,7 @@ static void rfcomm_dlc_timeout(unsigned long arg) set_bit(RFCOMM_TIMED_OUT, &d->flags); rfcomm_dlc_put(d); - rfcomm_schedule(RFCOMM_SCHED_TIMEO); + rfcomm_schedule(); } static void rfcomm_dlc_set_timer(struct rfcomm_dlc *d, long timeout) @@ -465,7 +464,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) case BT_CONFIG: if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { set_bit(RFCOMM_AUTH_REJECT, &d->flags); - rfcomm_schedule(RFCOMM_SCHED_AUTH); + rfcomm_schedule(); break; } /* Fall through */ @@ -485,7 +484,7 @@ static int __rfcomm_dlc_close(struct rfcomm_dlc *d, int err) case BT_CONNECT2: if (test_and_clear_bit(RFCOMM_DEFER_SETUP, &d->flags)) { set_bit(RFCOMM_AUTH_REJECT, &d->flags); - rfcomm_schedule(RFCOMM_SCHED_AUTH); + rfcomm_schedule(); break; } /* Fall through */ @@ -533,7 +532,7 @@ int rfcomm_dlc_send(struct rfcomm_dlc *d, struct sk_buff *skb) skb_queue_tail(&d->tx_queue, skb); if (!test_bit(RFCOMM_TX_THROTTLED, &d->flags)) - rfcomm_schedule(RFCOMM_SCHED_TX); + rfcomm_schedule(); return len; } @@ -545,7 +544,7 @@ void __rfcomm_dlc_throttle(struct rfcomm_dlc *d) d->v24_sig |= RFCOMM_V24_FC; set_bit(RFCOMM_MSC_PENDING, &d->flags); } - rfcomm_schedule(RFCOMM_SCHED_TX); + rfcomm_schedule(); } void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) @@ -556,7 +555,7 @@ void __rfcomm_dlc_unthrottle(struct rfcomm_dlc *d) d->v24_sig &= ~RFCOMM_V24_FC; set_bit(RFCOMM_MSC_PENDING, &d->flags); } - rfcomm_schedule(RFCOMM_SCHED_TX); + rfcomm_schedule(); } /* @@ -577,7 +576,7 @@ int rfcomm_dlc_set_modem_status(struct rfcomm_dlc *d, u8 v24_sig) d->v24_sig = v24_sig; if (!test_and_set_bit(RFCOMM_MSC_PENDING, &d->flags)) - rfcomm_schedule(RFCOMM_SCHED_TX); + rfcomm_schedule(); return 0; } @@ -816,7 +815,7 @@ static int rfcomm_queue_disc(struct rfcomm_dlc *d) cmd->fcs = __fcs2((u8 *) cmd); skb_queue_tail(&d->tx_queue, skb); - rfcomm_schedule(RFCOMM_SCHED_TX); + rfcomm_schedule(); return 0; } @@ -1884,7 +1883,7 @@ static inline void rfcomm_accept_connection(struct rfcomm_session *s) * L2CAP MTU minus UIH header and FCS. */ s->mtu = min(l2cap_pi(nsock->sk)->omtu, l2cap_pi(nsock->sk)->imtu) - 5; - rfcomm_schedule(RFCOMM_SCHED_RX); + rfcomm_schedule(); } else sock_release(nsock); } @@ -2093,7 +2092,7 @@ static void rfcomm_security_cfm(struct hci_conn *conn, u8 status, u8 encrypt) rfcomm_session_put(s); - rfcomm_schedule(RFCOMM_SCHED_AUTH); + rfcomm_schedule(); } static struct hci_cb rfcomm_cb = { -- cgit v1.2.3 From 271733cf844a2f5f186ef3b40c26d6397b71039a Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:23 +0200 Subject: cfg80211: notify drivers about frame registrations Drivers may need to adjust their filters according to frame registrations, so notify them about them. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/cfg80211.h | 7 +++++++ net/wireless/mlme.c | 23 ++++++++++++++++++++--- 2 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 24d5b586927..2a7936d7851 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1147,6 +1147,9 @@ struct cfg80211_pmksa { * allows the driver to adjust the dynamic ps timeout value. * @set_cqm_rssi_config: Configure connection quality monitor RSSI threshold. * + * @mgmt_frame_register: Notify driver that a management frame type was + * registered. Note that this callback may not sleep, and cannot run + * concurrently with itself. */ struct cfg80211_ops { int (*suspend)(struct wiphy *wiphy); @@ -1297,6 +1300,10 @@ struct cfg80211_ops { int (*set_cqm_rssi_config)(struct wiphy *wiphy, struct net_device *dev, s32 rssi_thold, u32 rssi_hyst); + + void (*mgmt_frame_register)(struct wiphy *wiphy, + struct net_device *dev, + u16 frame_type, bool reg); }; /* diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index caf11a42750..26838d903b9 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -764,6 +764,8 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, u16 frame_type, const u8 *match_data, int match_len) { + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct cfg80211_mgmt_registration *reg, *nreg; int err = 0; u16 mgmt_type; @@ -810,22 +812,37 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); + if (rdev->ops->mgmt_frame_register) + rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, + frame_type, true); + out: spin_unlock_bh(&wdev->mgmt_registrations_lock); + return err; } void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) { + struct wiphy *wiphy = wdev->wiphy; + struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct cfg80211_mgmt_registration *reg, *tmp; spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid == nlpid) { - list_del(®->list); - kfree(reg); + if (reg->nlpid != nlpid) + continue; + + if (rdev->ops->mgmt_frame_register) { + u16 frame_type = le16_to_cpu(reg->frame_type); + + rdev->ops->mgmt_frame_register(wiphy, wdev->netdev, + frame_type, false); } + + list_del(®->list); + kfree(reg); } spin_unlock_bh(&wdev->mgmt_registrations_lock); -- cgit v1.2.3 From 7be5086d4cb7cceb71d724a9524d5e927785d04f Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 13 Oct 2010 12:06:24 +0200 Subject: mac80211: add probe request filter flag Using the frame registration notification, we can see when probe requests are requested and notify the low-level driver via filtering. The flag is also set in AP and IBSS modes. Signed-off-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/mac80211.h | 9 ++++++--- net/mac80211/cfg.c | 18 ++++++++++++++++++ net/mac80211/ieee80211_i.h | 4 +++- net/mac80211/iface.c | 9 ++++++++- net/mac80211/main.c | 3 +++ 5 files changed, 38 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 33aa2e39147..9fdf982d128 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1478,12 +1478,14 @@ ieee80211_get_alt_retry_rate(const struct ieee80211_hw *hw, * honour this flag if possible. * * @FIF_CONTROL: pass control frames (except for PS Poll), if PROMISC_IN_BSS - * is not set then only those addressed to this station. + * is not set then only those addressed to this station. * * @FIF_OTHER_BSS: pass frames destined to other BSSes * - * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only - * those addressed to this station. + * @FIF_PSPOLL: pass PS Poll frames, if PROMISC_IN_BSS is not set then only + * those addressed to this station. + * + * @FIF_PROBE_REQ: pass probe request frames */ enum ieee80211_filter_flags { FIF_PROMISC_IN_BSS = 1<<0, @@ -1494,6 +1496,7 @@ enum ieee80211_filter_flags { FIF_CONTROL = 1<<5, FIF_OTHER_BSS = 1<<6, FIF_PSPOLL = 1<<7, + FIF_PROBE_REQ = 1<<8, }; /** diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 25fb351e00f..18bd0e55060 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1604,6 +1604,23 @@ static int ieee80211_mgmt_tx(struct wiphy *wiphy, struct net_device *dev, return 0; } +static void ieee80211_mgmt_frame_register(struct wiphy *wiphy, + struct net_device *dev, + u16 frame_type, bool reg) +{ + struct ieee80211_local *local = wiphy_priv(wiphy); + + if (frame_type != (IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ)) + return; + + if (reg) + local->probe_req_reg++; + else + local->probe_req_reg--; + + ieee80211_queue_work(&local->hw, &local->reconfig_filter); +} + struct cfg80211_ops mac80211_config_ops = { .add_virtual_intf = ieee80211_add_iface, .del_virtual_intf = ieee80211_del_iface, @@ -1655,4 +1672,5 @@ struct cfg80211_ops mac80211_config_ops = { .cancel_remain_on_channel = ieee80211_cancel_remain_on_channel, .mgmt_tx = ieee80211_mgmt_tx, .set_cqm_rssi_config = ieee80211_set_cqm_rssi_config, + .mgmt_frame_register = ieee80211_mgmt_frame_register, }; diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index f0610fa4fbe..b80c3868992 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -707,7 +707,9 @@ struct ieee80211_local { int open_count; int monitors, cooked_mntrs; /* number of interfaces with corresponding FIF_ flags */ - int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll; + int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll, + fif_probe_req; + int probe_req_reg; unsigned int filter_flags; /* FIF_* */ bool wiphy_ciphers_allocated; diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c index e99d1b60557..f9163b12c7f 100644 --- a/net/mac80211/iface.c +++ b/net/mac80211/iface.c @@ -280,8 +280,11 @@ static int ieee80211_do_open(struct net_device *dev, bool coming_up) ieee80211_start_mesh(sdata); } else if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll++; + local->fif_probe_req++; ieee80211_configure_filter(local); + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + local->fif_probe_req++; } changed |= ieee80211_reset_erp_info(sdata); @@ -428,8 +431,12 @@ static void ieee80211_do_stop(struct ieee80211_sub_if_data *sdata, if (sdata->flags & IEEE80211_SDATA_PROMISC) atomic_dec(&local->iff_promiscs); - if (sdata->vif.type == NL80211_IFTYPE_AP) + if (sdata->vif.type == NL80211_IFTYPE_AP) { local->fif_pspoll--; + local->fif_probe_req--; + } else if (sdata->vif.type == NL80211_IFTYPE_ADHOC) { + local->fif_probe_req--; + } netif_addr_lock_bh(sdata->dev); spin_lock_bh(&local->filter_lock); diff --git a/net/mac80211/main.c b/net/mac80211/main.c index 915ecf87e4a..5162303a8b4 100644 --- a/net/mac80211/main.c +++ b/net/mac80211/main.c @@ -54,6 +54,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local) if (local->monitors || local->scanning) new_flags |= FIF_BCN_PRBRESP_PROMISC; + if (local->fif_probe_req || local->probe_req_reg) + new_flags |= FIF_PROBE_REQ; + if (local->fif_fcsfail) new_flags |= FIF_FCSFAIL; -- cgit v1.2.3 From b3d6255388de0680a14f0907deb7b7f4fa0d25d5 Mon Sep 17 00:00:00 2001 From: Kumar Sanghvi Date: Tue, 12 Oct 2010 20:14:43 +0000 Subject: Phonet: 'connect' socket implementation for Pipe controller MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Based on suggestion by Rémi Denis-Courmont to implement 'connect' for Pipe controller logic, this patch implements 'connect' socket call for the Pipe controller logic. The patch does following:- - Removes setsockopts for PNPIPE_CREATE and PNPIPE_DESTROY - Adds setsockopt for setting the Pipe handle value - Implements connect socket call - Updates the Pipe controller logic User-space should now follow below sequence with Pipe controller:- -socket -bind -setsockopt for PNPIPE_PIPE_HANDLE -connect -setsockopt for PNPIPE_ENCAP_IP -setsockopt for PNPIPE_ENABLE GPRS/3G data has been tested working fine with this. Signed-off-by: Kumar Sanghvi Acked-by: Rémi Denis-Courmont Signed-off-by: David S. Miller --- include/linux/phonet.h | 3 +- include/net/phonet/pep.h | 4 +- net/phonet/pep.c | 300 ++++++++++++++++++----------------------------- net/phonet/socket.c | 99 ++++++++++++++++ 4 files changed, 215 insertions(+), 191 deletions(-) (limited to 'include/net') diff --git a/include/linux/phonet.h b/include/linux/phonet.h index e27cbf93174..26c8df78691 100644 --- a/include/linux/phonet.h +++ b/include/linux/phonet.h @@ -36,10 +36,9 @@ /* Socket options for SOL_PNPIPE level */ #define PNPIPE_ENCAP 1 #define PNPIPE_IFINDEX 2 -#define PNPIPE_CREATE 3 +#define PNPIPE_PIPE_HANDLE 3 #define PNPIPE_ENABLE 4 /* unused slot */ -#define PNPIPE_DESTROY 6 #define PNADDR_ANY 0 #define PNADDR_BROADCAST 0xFC diff --git a/include/net/phonet/pep.h b/include/net/phonet/pep.h index def6cfa3f45..b60b28c99e8 100644 --- a/include/net/phonet/pep.h +++ b/include/net/phonet/pep.h @@ -46,8 +46,8 @@ struct pep_sock { u8 init_enable; /* auto-enable at creation */ u8 aligned; #ifdef CONFIG_PHONET_PIPECTRLR - u16 remote_pep; - u8 pipe_state; + u8 pipe_state; + struct sockaddr_pn remote_pep; #endif }; diff --git a/net/phonet/pep.c b/net/phonet/pep.c index f818f76d297..9c903f9e507 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -88,15 +88,6 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, const struct pnpipehdr *oph = pnp_hdr(oskb); struct pnpipehdr *ph; struct sk_buff *skb; -#ifdef CONFIG_PHONET_PIPECTRLR - const struct phonethdr *hdr = pn_hdr(oskb); - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = hdr->pn_sdev, - .spn_obj = hdr->pn_sobj, - }; -#endif skb = alloc_skb(MAX_PNPIPE_HEADER + len, priority); if (!skb) @@ -114,11 +105,7 @@ static int pep_reply(struct sock *sk, struct sk_buff *oskb, ph->pipe_handle = oph->pipe_handle; ph->error_code = code; -#ifdef CONFIG_PHONET_PIPECTRLR - return pn_skb_send(sk, skb, &spn); -#else return pn_skb_send(sk, skb, &pipe_srv); -#endif } #define PAD 0x00 @@ -188,18 +175,13 @@ static int pipe_get_flow_info(struct sock *sk, struct sk_buff *skb, return 0; } -static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid, - u8 msg_id, u8 p_handle, gfp_t priority) +static int pipe_handler_send_req(struct sock *sk, u8 utid, + u8 msg_id, gfp_t priority) { int len; struct pnpipehdr *ph; struct sk_buff *skb; - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = pn_dev(dobj), - .spn_obj = pn_obj(dobj), - }; + struct pep_sock *pn = pep_sk(sk); static const u8 data[4] = { PAD, PAD, PAD, PAD, @@ -235,30 +217,25 @@ static int pipe_handler_send_req(struct sock *sk, u16 dobj, u8 utid, ph = pnp_hdr(skb); ph->utid = utid; ph->message_id = msg_id; - ph->pipe_handle = p_handle; + ph->pipe_handle = pn->pipe_handle; ph->error_code = PN_PIPE_NO_ERROR; - return pn_skb_send(sk, skb, &spn); + return pn_skb_send(sk, skb, &pn->remote_pep); } -static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj, - u8 utid, u8 p_handle, u8 msg_id, u8 tx_fc, u8 rx_fc) +static int pipe_handler_send_created_ind(struct sock *sk, + u8 utid, u8 msg_id) { int err_code; struct pnpipehdr *ph; struct sk_buff *skb; - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = pn_dev(dobj), - .spn_obj = pn_obj(dobj), - }; + struct pep_sock *pn = pep_sk(sk); static u8 data[4] = { 0x03, 0x04, }; - data[2] = tx_fc; - data[3] = rx_fc; + data[2] = pn->tx_fc; + data[3] = pn->rx_fc; /* * actually, below is number of sub-blocks and not error code. @@ -282,24 +259,18 @@ static int pipe_handler_send_created_ind(struct sock *sk, u16 dobj, ph = pnp_hdr(skb); ph->utid = utid; ph->message_id = msg_id; - ph->pipe_handle = p_handle; + ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; - return pn_skb_send(sk, skb, &spn); + return pn_skb_send(sk, skb, &pn->remote_pep); } -static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid, - u8 p_handle, u8 msg_id) +static int pipe_handler_send_ind(struct sock *sk, u8 utid, u8 msg_id) { int err_code; struct pnpipehdr *ph; struct sk_buff *skb; - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = pn_dev(dobj), - .spn_obj = pn_obj(dobj), - }; + struct pep_sock *pn = pep_sk(sk); /* * actually, below is a filler. @@ -321,10 +292,10 @@ static int pipe_handler_send_ind(struct sock *sk, u16 dobj, u8 utid, ph = pnp_hdr(skb); ph->utid = utid; ph->message_id = msg_id; - ph->pipe_handle = p_handle; + ph->pipe_handle = pn->pipe_handle; ph->error_code = err_code; - return pn_skb_send(sk, skb, &spn); + return pn_skb_send(sk, skb, &pn->remote_pep); } static int pipe_handler_enable_pipe(struct sock *sk, int enable) @@ -339,34 +310,7 @@ static int pipe_handler_enable_pipe(struct sock *sk, int enable) utid = PNS_PIPE_DISABLE_UTID; req = PNS_PEP_DISABLE_REQ; } - return pipe_handler_send_req(sk, pn->pn_sk.sobject, utid, req, - pn->pipe_handle, GFP_ATOMIC); -} - -static int pipe_handler_create_pipe(struct sock *sk, int pipe_handle, int cmd) -{ - int ret; - struct pep_sock *pn = pep_sk(sk); - - switch (cmd) { - case PNPIPE_CREATE: - ret = pipe_handler_send_req(sk, pn->pn_sk.sobject, - PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, - pipe_handle, GFP_ATOMIC); - break; - - case PNPIPE_DESTROY: - ret = pipe_handler_send_req(sk, pn->remote_pep, - PNS_PEP_DISCONNECT_UTID, - PNS_PEP_DISCONNECT_REQ, - pn->pipe_handle, GFP_ATOMIC); - break; - - default: - ret = -EINVAL; - } - - return ret; + return pipe_handler_send_req(sk, utid, req, GFP_ATOMIC); } #endif @@ -434,14 +378,6 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; struct sk_buff *skb; -#ifdef CONFIG_PHONET_PIPECTRLR - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = pn_dev(pn->remote_pep), - .spn_obj = pn_obj(pn->remote_pep), - }; -#endif skb = alloc_skb(MAX_PNPIPE_HEADER + 4, priority); if (!skb) @@ -462,7 +398,7 @@ static int pipe_snd_status(struct sock *sk, u8 type, u8 status, gfp_t priority) ph->data[4] = status; #ifdef CONFIG_PHONET_PIPECTRLR - return pn_skb_send(sk, skb, &spn); + return pn_skb_send(sk, skb, &pn->remote_pep); #else return pn_skb_send(sk, skb, &pipe_srv); #endif @@ -582,12 +518,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) struct pnpipehdr *hdr = pnp_hdr(skb); struct sk_buff_head *queue; int err = 0; -#ifdef CONFIG_PHONET_PIPECTRLR - struct phonethdr *ph = pn_hdr(skb); - static u8 host_pref_rx_fc[3], host_req_tx_fc[3]; - u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; - u8 negotiated_rx_fc, negotiated_tx_fc; -#endif BUG_ON(sk->sk_state == TCP_CLOSE_WAIT); @@ -596,40 +526,6 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) pep_reject_conn(sk, skb, PN_PIPE_ERR_PEP_IN_USE); break; -#ifdef CONFIG_PHONET_PIPECTRLR - case PNS_PEP_CONNECT_RESP: - if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && - (ph->pn_sobj == pn_obj(pn->remote_pep))) { - pipe_get_flow_info(sk, skb, remote_pref_rx_fc, - remote_req_tx_fc); - - negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc, - host_pref_rx_fc, - sizeof(host_pref_rx_fc)); - negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc, - remote_pref_rx_fc, - sizeof(host_pref_rx_fc)); - - pn->pipe_state = PIPE_DISABLED; - pipe_handler_send_created_ind(sk, pn->remote_pep, - PNS_PIPE_CREATED_IND_UTID, - pn->pipe_handle, PNS_PIPE_CREATED_IND, - negotiated_tx_fc, negotiated_rx_fc); - pipe_handler_send_created_ind(sk, pn->pn_sk.sobject, - PNS_PIPE_CREATED_IND_UTID, - pn->pipe_handle, PNS_PIPE_CREATED_IND, - negotiated_tx_fc, negotiated_rx_fc); - } else { - pipe_handler_send_req(sk, pn->remote_pep, - PNS_PEP_CONNECT_UTID, - PNS_PEP_CONNECT_REQ, pn->pipe_handle, - GFP_ATOMIC); - pipe_get_flow_info(sk, skb, host_pref_rx_fc, - host_req_tx_fc); - } - break; -#endif - case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); sk->sk_state = TCP_CLOSE_WAIT; @@ -640,10 +536,7 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_DISCONNECT_RESP: pn->pipe_state = PIPE_IDLE; - pipe_handler_send_req(sk, pn->pn_sk.sobject, - PNS_PEP_DISCONNECT_UTID, - PNS_PEP_DISCONNECT_REQ, pn->pipe_handle, - GFP_KERNEL); + sk->sk_state = TCP_CLOSE; break; #endif @@ -654,21 +547,18 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_ENABLE_RESP: - if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && - (ph->pn_sobj == pn_obj(pn->remote_pep))) { - pn->pipe_state = PIPE_ENABLED; - pipe_handler_send_ind(sk, pn->remote_pep, - PNS_PIPE_ENABLED_IND_UTID, - pn->pipe_handle, PNS_PIPE_ENABLED_IND); - pipe_handler_send_ind(sk, pn->pn_sk.sobject, - PNS_PIPE_ENABLED_IND_UTID, - pn->pipe_handle, PNS_PIPE_ENABLED_IND); - } else - pipe_handler_send_req(sk, pn->remote_pep, - PNS_PIPE_ENABLE_UTID, - PNS_PEP_ENABLE_REQ, pn->pipe_handle, - GFP_KERNEL); + pn->pipe_state = PIPE_ENABLED; + pipe_handler_send_ind(sk, PNS_PIPE_ENABLED_IND_UTID, + PNS_PIPE_ENABLED_IND); + if (!pn_flow_safe(pn->tx_fc)) { + atomic_set(&pn->tx_credits, 1); + sk->sk_write_space(sk); + } + if (sk->sk_state == TCP_ESTABLISHED) + break; /* Nothing to do */ + sk->sk_state = TCP_ESTABLISHED; + pipe_grant_credits(sk); break; #endif @@ -692,22 +582,12 @@ static int pipe_do_rcv(struct sock *sk, struct sk_buff *skb) #ifdef CONFIG_PHONET_PIPECTRLR case PNS_PEP_DISABLE_RESP: - if ((ph->pn_sdev == pn_dev(pn->remote_pep)) && - (ph->pn_sobj == pn_obj(pn->remote_pep))) { - pn->pipe_state = PIPE_DISABLED; - pipe_handler_send_ind(sk, pn->remote_pep, - PNS_PIPE_DISABLED_IND_UTID, - pn->pipe_handle, - PNS_PIPE_DISABLED_IND); - pipe_handler_send_ind(sk, pn->pn_sk.sobject, - PNS_PIPE_DISABLED_IND_UTID, - pn->pipe_handle, - PNS_PIPE_DISABLED_IND); - } else - pipe_handler_send_req(sk, pn->remote_pep, - PNS_PIPE_DISABLE_UTID, - PNS_PEP_DISABLE_REQ, pn->pipe_handle, - GFP_KERNEL); + pn->pipe_state = PIPE_DISABLED; + atomic_set(&pn->tx_credits, 0); + pipe_handler_send_ind(sk, PNS_PIPE_DISABLED_IND_UTID, + PNS_PIPE_DISABLED_IND); + sk->sk_state = TCP_SYN_RECV; + pn->rx_credits = 0; break; #endif @@ -802,6 +682,42 @@ static void pipe_destruct(struct sock *sk) skb_queue_purge(&pn->ctrlreq_queue); } +#ifdef CONFIG_PHONET_PIPECTRLR +static int pep_connresp_rcv(struct sock *sk, struct sk_buff *skb) +{ + struct pep_sock *pn = pep_sk(sk); + u8 host_pref_rx_fc[3] = {3, 2, 1}, host_req_tx_fc[3] = {3, 2, 1}; + u8 remote_pref_rx_fc[3], remote_req_tx_fc[3]; + u8 negotiated_rx_fc, negotiated_tx_fc; + int ret; + + pipe_get_flow_info(sk, skb, remote_pref_rx_fc, + remote_req_tx_fc); + negotiated_tx_fc = pipe_negotiate_fc(remote_req_tx_fc, + host_pref_rx_fc, + sizeof(host_pref_rx_fc)); + negotiated_rx_fc = pipe_negotiate_fc(host_req_tx_fc, + remote_pref_rx_fc, + sizeof(host_pref_rx_fc)); + + pn->pipe_state = PIPE_DISABLED; + sk->sk_state = TCP_SYN_RECV; + sk->sk_backlog_rcv = pipe_do_rcv; + sk->sk_destruct = pipe_destruct; + pn->rx_credits = 0; + pn->rx_fc = negotiated_rx_fc; + pn->tx_fc = negotiated_tx_fc; + sk->sk_state_change(sk); + + ret = pipe_handler_send_created_ind(sk, + PNS_PIPE_CREATED_IND_UTID, + PNS_PIPE_CREATED_IND + ); + + return ret; +} +#endif + static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) { struct sock *newsk; @@ -884,9 +800,6 @@ static int pep_connreq_rcv(struct sock *sk, struct sk_buff *skb) newpn->rx_fc = newpn->tx_fc = PN_LEGACY_FLOW_CONTROL; newpn->init_enable = enabled; newpn->aligned = aligned; -#ifdef CONFIG_PHONET_PIPECTRLR - newpn->remote_pep = pn->remote_pep; -#endif BUG_ON(!skb_queue_empty(&newsk->sk_receive_queue)); skb_queue_head(&newsk->sk_receive_queue, skb); @@ -968,6 +881,12 @@ static int pep_do_rcv(struct sock *sk, struct sk_buff *skb) err = pep_connreq_rcv(sk, skb); break; +#ifdef CONFIG_PHONET_PIPECTRLR + case PNS_PEP_CONNECT_RESP: + err = pep_connresp_rcv(sk, skb); + break; +#endif + case PNS_PEP_DISCONNECT_REQ: pep_reply(sk, skb, PN_PIPE_NO_ERROR, NULL, 0, GFP_ATOMIC); break; @@ -1032,6 +951,18 @@ static void pep_sock_close(struct sock *sk, long timeout) /* Forcefully remove dangling Phonet pipe */ pipe_do_remove(sk); +#ifdef CONFIG_PHONET_PIPECTRLR + if (pn->pipe_state != PIPE_IDLE) { + /* send pep disconnect request */ + pipe_handler_send_req(sk, + PNS_PEP_DISCONNECT_UTID, PNS_PEP_DISCONNECT_REQ, + GFP_KERNEL); + + pn->pipe_state = PIPE_IDLE; + sk->sk_state = TCP_CLOSE; + } +#endif + ifindex = pn->ifindex; pn->ifindex = 0; release_sock(sk); @@ -1108,6 +1039,20 @@ out: return newsk; } +#ifdef CONFIG_PHONET_PIPECTRLR +static int pep_sock_connect(struct sock *sk, struct sockaddr *addr, int len) +{ + struct pep_sock *pn = pep_sk(sk); + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + + memcpy(&pn->remote_pep, spn, sizeof(struct sockaddr_pn)); + + return pipe_handler_send_req(sk, + PNS_PEP_CONNECT_UTID, PNS_PEP_CONNECT_REQ, + GFP_ATOMIC); +} +#endif + static int pep_ioctl(struct sock *sk, int cmd, unsigned long arg) { struct pep_sock *pn = pep_sk(sk); @@ -1149,10 +1094,6 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, { struct pep_sock *pn = pep_sk(sk); int val = 0, err = 0; -#ifdef CONFIG_PHONET_PIPECTRLR - int remote_pep; - int pipe_handle; -#endif if (level != SOL_PNPIPE) return -ENOPROTOOPT; @@ -1164,28 +1105,15 @@ static int pep_setsockopt(struct sock *sk, int level, int optname, lock_sock(sk); switch (optname) { #ifdef CONFIG_PHONET_PIPECTRLR - case PNPIPE_CREATE: + case PNPIPE_PIPE_HANDLE: if (val) { if (pn->pipe_state > PIPE_IDLE) { err = -EFAULT; break; } - remote_pep = val & 0xFFFF; - pipe_handle = (val >> 16) & 0xFF; - pn->remote_pep = remote_pep; - err = pipe_handler_create_pipe(sk, pipe_handle, - PNPIPE_CREATE); - break; - } - - case PNPIPE_DESTROY: - if (pn->pipe_state < PIPE_DISABLED) { - err = -EFAULT; + pn->pipe_handle = val; break; } - - err = pipe_handler_create_pipe(sk, 0x0, PNPIPE_DESTROY); - break; #endif case PNPIPE_ENCAP: @@ -1278,14 +1206,6 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) struct pep_sock *pn = pep_sk(sk); struct pnpipehdr *ph; int err; -#ifdef CONFIG_PHONET_PIPECTRLR - struct sockaddr_pn spn = { - .spn_family = AF_PHONET, - .spn_resource = 0xD9, - .spn_dev = pn_dev(pn->remote_pep), - .spn_obj = pn_obj(pn->remote_pep), - }; -#endif if (pn_flow_safe(pn->tx_fc) && !atomic_add_unless(&pn->tx_credits, -1, 0)) { @@ -1304,7 +1224,7 @@ static int pipe_skb_send(struct sock *sk, struct sk_buff *skb) ph->message_id = PNS_PIPE_DATA; ph->pipe_handle = pn->pipe_handle; #ifdef CONFIG_PHONET_PIPECTRLR - err = pn_skb_send(sk, skb, &spn); + err = pn_skb_send(sk, skb, &pn->remote_pep); #else err = pn_skb_send(sk, skb, &pipe_srv); #endif @@ -1504,6 +1424,8 @@ static void pep_sock_unhash(struct sock *sk) struct sock *skparent = NULL; lock_sock(sk); + +#ifndef CONFIG_PHONET_PIPECTRLR if ((1 << sk->sk_state) & ~(TCPF_CLOSE|TCPF_LISTEN)) { skparent = pn->listener; release_sock(sk); @@ -1513,6 +1435,7 @@ static void pep_sock_unhash(struct sock *sk) sk_del_node_init(sk); sk = skparent; } +#endif /* Unhash a listening sock only when it is closed * and all of its active connected pipes are closed. */ if (hlist_empty(&pn->hlist)) @@ -1526,6 +1449,9 @@ static void pep_sock_unhash(struct sock *sk) static struct proto pep_proto = { .close = pep_sock_close, .accept = pep_sock_accept, +#ifdef CONFIG_PHONET_PIPECTRLR + .connect = pep_sock_connect, +#endif .ioctl = pep_ioctl, .init = pep_init, .setsockopt = pep_setsockopt, diff --git a/net/phonet/socket.c b/net/phonet/socket.c index aca8fba099e..25f746d20c1 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -225,6 +225,101 @@ static int pn_socket_autobind(struct socket *sock) return 0; /* socket was already bound */ } +#ifdef CONFIG_PHONET_PIPECTRLR +static int pn_socket_connect(struct socket *sock, struct sockaddr *addr, + int len, int flags) +{ + struct sock *sk = sock->sk; + struct sockaddr_pn *spn = (struct sockaddr_pn *)addr; + long timeo; + int err; + + if (len < sizeof(struct sockaddr_pn)) + return -EINVAL; + if (spn->spn_family != AF_PHONET) + return -EAFNOSUPPORT; + + lock_sock(sk); + + switch (sock->state) { + case SS_UNCONNECTED: + sk->sk_state = TCP_CLOSE; + break; + case SS_CONNECTING: + switch (sk->sk_state) { + case TCP_SYN_RECV: + sock->state = SS_CONNECTED; + err = -EISCONN; + goto out; + case TCP_CLOSE: + err = -EALREADY; + if (flags & O_NONBLOCK) + goto out; + goto wait_connect; + } + break; + case SS_CONNECTED: + switch (sk->sk_state) { + case TCP_SYN_RECV: + err = -EISCONN; + goto out; + case TCP_CLOSE: + sock->state = SS_UNCONNECTED; + break; + } + break; + case SS_DISCONNECTING: + case SS_FREE: + break; + } + sk->sk_state = TCP_CLOSE; + sk_stream_kill_queues(sk); + + sock->state = SS_CONNECTING; + err = sk->sk_prot->connect(sk, addr, len); + if (err < 0) { + sock->state = SS_UNCONNECTED; + sk->sk_state = TCP_CLOSE; + goto out; + } + + err = -EINPROGRESS; +wait_connect: + if (sk->sk_state != TCP_SYN_RECV && (flags & O_NONBLOCK)) + goto out; + + timeo = sock_sndtimeo(sk, flags & O_NONBLOCK); + release_sock(sk); + + err = -ERESTARTSYS; + timeo = wait_event_interruptible_timeout(*sk_sleep(sk), + sk->sk_state != TCP_CLOSE, + timeo); + + lock_sock(sk); + if (timeo < 0) + goto out; /* -ERESTARTSYS */ + + err = -ETIMEDOUT; + if (timeo == 0 && sk->sk_state != TCP_SYN_RECV) + goto out; + + if (sk->sk_state != TCP_SYN_RECV) { + sock->state = SS_UNCONNECTED; + err = sock_error(sk); + if (!err) + err = -ECONNREFUSED; + goto out; + } + sock->state = SS_CONNECTED; + err = 0; + +out: + release_sock(sk); + return err; +} +#endif + static int pn_socket_accept(struct socket *sock, struct socket *newsock, int flags) { @@ -393,7 +488,11 @@ const struct proto_ops phonet_stream_ops = { .owner = THIS_MODULE, .release = pn_socket_release, .bind = pn_socket_bind, +#ifdef CONFIG_PHONET_PIPECTRLR + .connect = pn_socket_connect, +#else .connect = sock_no_connect, +#endif .socketpair = sock_no_socketpair, .accept = pn_socket_accept, .getname = pn_socket_getname, -- cgit v1.2.3 From 31e3c3f6f1f9b154981a0e6620df700463db30ee Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Wed, 13 Oct 2010 13:20:35 +0000 Subject: tipc: cleanup function namespace Do some cleanups of TIPC based on make namespacecheck 1. Don't export unused symbols 2. Eliminate dead code 3. Make functions and variables local 4. Rename buf_acquire to tipc_buf_acquire since it is used in several files Compile tested only. This make break out of tree kernel modules that depend on TIPC routines. Signed-off-by: Stephen Hemminger Acked-by: Jon Maloy Acked-by: Paul Gortmaker Signed-off-by: David S. Miller --- include/net/tipc/tipc.h | 71 ------------- include/net/tipc/tipc_port.h | 2 - net/tipc/addr.c | 5 - net/tipc/bcast.c | 10 +- net/tipc/bcast.h | 3 - net/tipc/cluster.c | 21 +--- net/tipc/cluster.h | 2 +- net/tipc/config.c | 7 +- net/tipc/config.h | 6 -- net/tipc/core.c | 28 ++---- net/tipc/core.h | 9 +- net/tipc/dbg.c | 13 ++- net/tipc/dbg.h | 3 - net/tipc/discover.c | 16 +-- net/tipc/discover.h | 2 - net/tipc/link.c | 45 +++++---- net/tipc/link.h | 4 - net/tipc/msg.c | 2 +- net/tipc/name_distr.c | 2 +- net/tipc/node.c | 19 +--- net/tipc/node.h | 1 - net/tipc/port.c | 234 +++++-------------------------------------- net/tipc/port.h | 2 - net/tipc/ref.c | 17 ---- net/tipc/ref.h | 1 - net/tipc/subscr.c | 9 -- net/tipc/zone.c | 11 -- net/tipc/zone.h | 1 - 28 files changed, 84 insertions(+), 462 deletions(-) (limited to 'include/net') diff --git a/include/net/tipc/tipc.h b/include/net/tipc/tipc.h index 15af6dca0b4..1e0645e1eed 100644 --- a/include/net/tipc/tipc.h +++ b/include/net/tipc/tipc.h @@ -50,8 +50,6 @@ * TIPC operating mode routines */ -u32 tipc_get_addr(void); - #define TIPC_NOT_RUNNING 0 #define TIPC_NODE_MODE 1 #define TIPC_NET_MODE 2 @@ -62,8 +60,6 @@ int tipc_attach(unsigned int *userref, tipc_mode_event, void *usr_handle); void tipc_detach(unsigned int userref); -int tipc_get_mode(void); - /* * TIPC port manipulation routines */ @@ -153,12 +149,6 @@ int tipc_disconnect(u32 portref); int tipc_shutdown(u32 ref); -int tipc_isconnected(u32 portref, int *isconnected); - -int tipc_peer(u32 portref, struct tipc_portid *peer); - -int tipc_ref_valid(u32 portref); - /* * TIPC messaging routines */ @@ -170,38 +160,12 @@ int tipc_send(u32 portref, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf(u32 portref, - struct sk_buff *buf, - unsigned int dsz); - int tipc_send2name(u32 portref, struct tipc_name const *name, u32 domain, unsigned int num_sect, struct iovec const *msg_sect); -int tipc_send_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz); - -int tipc_forward2name(u32 portref, - struct tipc_name const *name, - u32 domain, - unsigned int section_count, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2name(u32 portref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_send2port(u32 portref, struct tipc_portid const *dest, unsigned int num_sect, @@ -212,46 +176,11 @@ int tipc_send_buf2port(u32 portref, struct sk_buff *buf, unsigned int dsz); -int tipc_forward2port(u32 portref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *origin, - unsigned int importance); - -int tipc_forward_buf2port(u32 portref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance); - int tipc_multicast(u32 portref, struct tipc_name_seq const *seq, u32 domain, /* currently unused */ unsigned int section_count, struct iovec const *msg); - -#if 0 -int tipc_multicast_buf(u32 portref, - struct tipc_name_seq const *seq, - u32 domain, - void *buf, - unsigned int size); -#endif - -/* - * TIPC subscription routines - */ - -int tipc_ispublished(struct tipc_name const *name); - -/* - * Get number of available nodes within specified domain (excluding own node) - */ - -unsigned int tipc_available_nodes(const u32 domain); - #endif #endif diff --git a/include/net/tipc/tipc_port.h b/include/net/tipc/tipc_port.h index c54917cbfa4..1893aaf4942 100644 --- a/include/net/tipc/tipc_port.h +++ b/include/net/tipc/tipc_port.h @@ -88,8 +88,6 @@ void tipc_acknowledge(u32 port_ref,u32 ack); struct tipc_port *tipc_get_port(const u32 ref); -void *tipc_get_handle(const u32 ref); - /* * The following routines require that the port be locked on entry */ diff --git a/net/tipc/addr.c b/net/tipc/addr.c index 2ddc351b3be..8a2e89bffde 100644 --- a/net/tipc/addr.c +++ b/net/tipc/addr.c @@ -41,11 +41,6 @@ #include "cluster.h" #include "net.h" -u32 tipc_get_addr(void) -{ - return tipc_own_addr; -} - /** * tipc_addr_domain_valid - validates a network domain address * diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c index ecfaac10d0b..22a60fc9839 100644 --- a/net/tipc/bcast.c +++ b/net/tipc/bcast.c @@ -121,6 +121,9 @@ static DEFINE_SPINLOCK(bc_lock); const char tipc_bclink_name[] = "broadcast-link"; +static void tipc_nmap_diff(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b, + struct tipc_node_map *nm_diff); static u32 buf_seqno(struct sk_buff *buf) { @@ -287,7 +290,7 @@ static void bclink_send_nack(struct tipc_node *n_ptr) if (!less(n_ptr->bclink.gap_after, n_ptr->bclink.gap_to)) return; - buf = buf_acquire(INT_H_SIZE); + buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { msg = buf_msg(buf); tipc_msg_init(msg, BCAST_PROTOCOL, STATE_MSG, @@ -871,8 +874,9 @@ void tipc_nmap_remove(struct tipc_node_map *nm_ptr, u32 node) * @nm_diff: output node map A-B (i.e. nodes of A that are not in B) */ -void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff) +static void tipc_nmap_diff(struct tipc_node_map *nm_a, + struct tipc_node_map *nm_b, + struct tipc_node_map *nm_diff) { int stop = ARRAY_SIZE(nm_a->map); int w; diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h index e8c2b81658c..011c03f0a4a 100644 --- a/net/tipc/bcast.h +++ b/net/tipc/bcast.h @@ -84,9 +84,6 @@ static inline int tipc_nmap_equal(struct tipc_node_map *nm_a, struct tipc_node_m return !memcmp(nm_a, nm_b, sizeof(*nm_a)); } -void tipc_nmap_diff(struct tipc_node_map *nm_a, struct tipc_node_map *nm_b, - struct tipc_node_map *nm_diff); - void tipc_port_list_add(struct port_list *pl_ptr, u32 port); void tipc_port_list_free(struct port_list *pl_ptr); diff --git a/net/tipc/cluster.c b/net/tipc/cluster.c index e68f705381b..7fea14b98b9 100644 --- a/net/tipc/cluster.c +++ b/net/tipc/cluster.c @@ -113,25 +113,6 @@ void tipc_cltr_delete(struct cluster *c_ptr) kfree(c_ptr); } -u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr) -{ - struct tipc_node *n_ptr; - u32 n_num = tipc_node(addr) + 1; - - if (!c_ptr) - return addr; - for (; n_num <= c_ptr->highest_node; n_num++) { - n_ptr = c_ptr->nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) - return n_ptr->addr; - } - for (n_num = 1; n_num < tipc_node(addr); n_num++) { - n_ptr = c_ptr->nodes[n_num]; - if (n_ptr && tipc_node_has_active_links(n_ptr)) - return n_ptr->addr; - } - return 0; -} void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr) { @@ -232,7 +213,7 @@ struct tipc_node *tipc_cltr_select_node(struct cluster *c_ptr, u32 selector) static struct sk_buff *tipc_cltr_prepare_routing_msg(u32 data_size, u32 dest) { u32 size = INT_H_SIZE + data_size; - struct sk_buff *buf = buf_acquire(size); + struct sk_buff *buf = tipc_buf_acquire(size); struct tipc_msg *msg; if (buf) { diff --git a/net/tipc/cluster.h b/net/tipc/cluster.h index 333efb0b9c4..32636d98c9c 100644 --- a/net/tipc/cluster.h +++ b/net/tipc/cluster.h @@ -75,7 +75,7 @@ void tipc_cltr_attach_node(struct cluster *c_ptr, struct tipc_node *n_ptr); void tipc_cltr_send_slave_routes(struct cluster *c_ptr, u32 dest); void tipc_cltr_broadcast(struct sk_buff *buf); int tipc_cltr_init(void); -u32 tipc_cltr_next_node(struct cluster *c_ptr, u32 addr); + void tipc_cltr_bcast_new_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); void tipc_cltr_send_local_routes(struct cluster *c_ptr, u32 dest); void tipc_cltr_bcast_lost_route(struct cluster *c_ptr, u32 dest, u32 lo, u32 hi); diff --git a/net/tipc/config.c b/net/tipc/config.c index c429b0d488a..50a6133a366 100644 --- a/net/tipc/config.c +++ b/net/tipc/config.c @@ -95,7 +95,7 @@ int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, return 1; } -struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) +static struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) { struct sk_buff *buf; __be32 value_net; @@ -109,6 +109,11 @@ struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value) return buf; } +static struct sk_buff *tipc_cfg_reply_unsigned(u32 value) +{ + return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value); +} + struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string) { struct sk_buff *buf; diff --git a/net/tipc/config.h b/net/tipc/config.h index 5cd7cc56c54..481e12ece71 100644 --- a/net/tipc/config.h +++ b/net/tipc/config.h @@ -45,7 +45,6 @@ struct sk_buff *tipc_cfg_reply_alloc(int payload_size); int tipc_cfg_append_tlv(struct sk_buff *buf, int tlv_type, void *tlv_data, int tlv_data_size); -struct sk_buff *tipc_cfg_reply_unsigned_type(u16 tlv_type, u32 value); struct sk_buff *tipc_cfg_reply_string_type(u16 tlv_type, char *string); static inline struct sk_buff *tipc_cfg_reply_none(void) @@ -53,11 +52,6 @@ static inline struct sk_buff *tipc_cfg_reply_none(void) return tipc_cfg_reply_alloc(0); } -static inline struct sk_buff *tipc_cfg_reply_unsigned(u32 value) -{ - return tipc_cfg_reply_unsigned_type(TIPC_TLV_UNSIGNED, value); -} - static inline struct sk_buff *tipc_cfg_reply_error_string(char *string) { return tipc_cfg_reply_string_type(TIPC_TLV_ERROR_STRING, string); diff --git a/net/tipc/core.c b/net/tipc/core.c index 466b861dab9..c00530386e3 100644 --- a/net/tipc/core.c +++ b/net/tipc/core.c @@ -96,13 +96,13 @@ int tipc_net_id; int tipc_remote_management; -int tipc_get_mode(void) +static int tipc_get_mode(void) { return tipc_mode; } /** - * buf_acquire - creates a TIPC message buffer + * tipc_buf_acquire - creates a TIPC message buffer * @size: message size (including TIPC header) * * Returns a new buffer with data pointers set to the specified size. @@ -111,7 +111,7 @@ int tipc_get_mode(void) * There may also be unrequested tailroom present at the buffer's end. */ -struct sk_buff *buf_acquire(u32 size) +struct sk_buff *tipc_buf_acquire(u32 size) { struct sk_buff *skb; unsigned int buf_size = (BUF_HEADROOM + size + 3) & ~3u; @@ -129,7 +129,7 @@ struct sk_buff *buf_acquire(u32 size) * tipc_core_stop_net - shut down TIPC networking sub-systems */ -void tipc_core_stop_net(void) +static void tipc_core_stop_net(void) { tipc_eth_media_stop(); tipc_net_stop(); @@ -154,7 +154,7 @@ int tipc_core_start_net(unsigned long addr) * tipc_core_stop - switch TIPC from SINGLE NODE to NOT RUNNING mode */ -void tipc_core_stop(void) +static void tipc_core_stop(void) { if (tipc_mode != TIPC_NODE_MODE) return; @@ -176,7 +176,7 @@ void tipc_core_stop(void) * tipc_core_start - switch TIPC from NOT RUNNING to SINGLE NODE mode */ -int tipc_core_start(void) +static int tipc_core_start(void) { int res; @@ -246,7 +246,6 @@ MODULE_VERSION(TIPC_MOD_VER); EXPORT_SYMBOL(tipc_attach); EXPORT_SYMBOL(tipc_detach); -EXPORT_SYMBOL(tipc_get_addr); EXPORT_SYMBOL(tipc_get_mode); EXPORT_SYMBOL(tipc_createport); EXPORT_SYMBOL(tipc_deleteport); @@ -262,23 +261,10 @@ EXPORT_SYMBOL(tipc_withdraw); EXPORT_SYMBOL(tipc_connect2port); EXPORT_SYMBOL(tipc_disconnect); EXPORT_SYMBOL(tipc_shutdown); -EXPORT_SYMBOL(tipc_isconnected); -EXPORT_SYMBOL(tipc_peer); -EXPORT_SYMBOL(tipc_ref_valid); EXPORT_SYMBOL(tipc_send); -EXPORT_SYMBOL(tipc_send_buf); EXPORT_SYMBOL(tipc_send2name); -EXPORT_SYMBOL(tipc_forward2name); -EXPORT_SYMBOL(tipc_send_buf2name); -EXPORT_SYMBOL(tipc_forward_buf2name); EXPORT_SYMBOL(tipc_send2port); -EXPORT_SYMBOL(tipc_forward2port); -EXPORT_SYMBOL(tipc_send_buf2port); -EXPORT_SYMBOL(tipc_forward_buf2port); EXPORT_SYMBOL(tipc_multicast); -/* EXPORT_SYMBOL(tipc_multicast_buf); not available yet */ -EXPORT_SYMBOL(tipc_ispublished); -EXPORT_SYMBOL(tipc_available_nodes); /* TIPC API for external bearers (see tipc_bearer.h) */ @@ -295,6 +281,4 @@ EXPORT_SYMBOL(tipc_createport_raw); EXPORT_SYMBOL(tipc_reject_msg); EXPORT_SYMBOL(tipc_send_buf_fast); EXPORT_SYMBOL(tipc_acknowledge); -EXPORT_SYMBOL(tipc_get_port); -EXPORT_SYMBOL(tipc_get_handle); diff --git a/net/tipc/core.h b/net/tipc/core.h index 188799017ab..e19389e5722 100644 --- a/net/tipc/core.h +++ b/net/tipc/core.h @@ -83,9 +83,7 @@ * Note: TIPC_LOG is configured to echo its output to the system console; * user-defined buffers can be configured to do the same thing. */ - extern struct print_buf *const TIPC_NULL; -extern struct print_buf *const TIPC_CONS; extern struct print_buf *const TIPC_LOG; void tipc_printf(struct print_buf *, const char *fmt, ...); @@ -204,10 +202,7 @@ extern atomic_t tipc_user_count; * Routines available to privileged subsystems */ -extern int tipc_core_start(void); -extern void tipc_core_stop(void); -extern int tipc_core_start_net(unsigned long addr); -extern void tipc_core_stop_net(void); +extern int tipc_core_start_net(unsigned long); extern int tipc_handler_start(void); extern void tipc_handler_stop(void); extern int tipc_netlink_start(void); @@ -328,7 +323,7 @@ static inline struct tipc_msg *buf_msg(struct sk_buff *skb) return (struct tipc_msg *)skb->data; } -extern struct sk_buff *buf_acquire(u32 size); +extern struct sk_buff *tipc_buf_acquire(u32 size); /** * buf_discard - frees a TIPC message buffer diff --git a/net/tipc/dbg.c b/net/tipc/dbg.c index 6569d45bfb9..46f51d208e5 100644 --- a/net/tipc/dbg.c +++ b/net/tipc/dbg.c @@ -52,7 +52,7 @@ static struct print_buf null_buf = { NULL, 0, NULL, 0 }; struct print_buf *const TIPC_NULL = &null_buf; static struct print_buf cons_buf = { NULL, 0, NULL, 1 }; -struct print_buf *const TIPC_CONS = &cons_buf; +static struct print_buf *const TIPC_CONS = &cons_buf; static struct print_buf log_buf = { NULL, 0, NULL, 1 }; struct print_buf *const TIPC_LOG = &log_buf; @@ -76,6 +76,10 @@ struct print_buf *const TIPC_LOG = &log_buf; static char print_string[TIPC_PB_MAX_STR]; static DEFINE_SPINLOCK(print_lock); +static void tipc_printbuf_reset(struct print_buf *pb); +static int tipc_printbuf_empty(struct print_buf *pb); +static void tipc_printbuf_move(struct print_buf *pb_to, + struct print_buf *pb_from); #define FORMAT(PTR,LEN,FMT) \ {\ @@ -116,7 +120,7 @@ void tipc_printbuf_init(struct print_buf *pb, char *raw, u32 size) * @pb: pointer to print buffer structure */ -void tipc_printbuf_reset(struct print_buf *pb) +static void tipc_printbuf_reset(struct print_buf *pb) { if (pb->buf) { pb->crs = pb->buf; @@ -132,7 +136,7 @@ void tipc_printbuf_reset(struct print_buf *pb) * Returns non-zero if print buffer is empty. */ -int tipc_printbuf_empty(struct print_buf *pb) +static int tipc_printbuf_empty(struct print_buf *pb) { return !pb->buf || (pb->crs == pb->buf); } @@ -181,7 +185,8 @@ int tipc_printbuf_validate(struct print_buf *pb) * Source print buffer becomes empty if a successful move occurs. */ -void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from) +static void tipc_printbuf_move(struct print_buf *pb_to, + struct print_buf *pb_from) { int len; diff --git a/net/tipc/dbg.h b/net/tipc/dbg.h index 5ef1bc8f64e..3ba6ba8b434 100644 --- a/net/tipc/dbg.h +++ b/net/tipc/dbg.h @@ -56,10 +56,7 @@ struct print_buf { #define TIPC_PB_MAX_STR 512 /* max printable string (with trailing NUL) */ void tipc_printbuf_init(struct print_buf *pb, char *buf, u32 size); -void tipc_printbuf_reset(struct print_buf *pb); -int tipc_printbuf_empty(struct print_buf *pb); int tipc_printbuf_validate(struct print_buf *pb); -void tipc_printbuf_move(struct print_buf *pb_to, struct print_buf *pb_from); int tipc_log_resize(int log_size); diff --git a/net/tipc/discover.c b/net/tipc/discover.c index dbd79c67d7c..4a7cd3719b7 100644 --- a/net/tipc/discover.c +++ b/net/tipc/discover.c @@ -68,20 +68,6 @@ struct link_req { unsigned int timer_intv; }; - -/* - * disc_lost_link(): A link has lost contact - */ - -void tipc_disc_link_event(u32 addr, char *name, int up) -{ - if (in_own_cluster(addr)) - return; - /* - * Code for inter cluster link setup here - */ -} - /** * tipc_disc_init_msg - initialize a link setup message * @type: message type (request or response) @@ -95,7 +81,7 @@ static struct sk_buff *tipc_disc_init_msg(u32 type, u32 dest_domain, struct bearer *b_ptr) { - struct sk_buff *buf = buf_acquire(DSC_H_SIZE); + struct sk_buff *buf = tipc_buf_acquire(DSC_H_SIZE); struct tipc_msg *msg; if (buf) { diff --git a/net/tipc/discover.h b/net/tipc/discover.h index 9d064c3639b..f8e75063612 100644 --- a/net/tipc/discover.h +++ b/net/tipc/discover.h @@ -50,6 +50,4 @@ void tipc_disc_stop_link_req(struct link_req *req); void tipc_disc_recv_msg(struct sk_buff *buf, struct bearer *b_ptr); -void tipc_disc_link_event(u32 addr, char *name, int up); - #endif diff --git a/net/tipc/link.c b/net/tipc/link.c index 4be78ecf4a6..b31992ccd5d 100644 --- a/net/tipc/link.c +++ b/net/tipc/link.c @@ -112,6 +112,9 @@ static void link_state_event(struct link *l_ptr, u32 event); static void link_reset_statistics(struct link *l_ptr); static void link_print(struct link *l_ptr, struct print_buf *buf, const char *str); +static void link_start(struct link *l_ptr); +static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf); + /* * Debugging code used by link routines only @@ -442,7 +445,7 @@ struct link *tipc_link_create(struct bearer *b_ptr, const u32 peer, k_init_timer(&l_ptr->timer, (Handler)link_timeout, (unsigned long)l_ptr); list_add_tail(&l_ptr->link_list, &b_ptr->links); - tipc_k_signal((Handler)tipc_link_start, (unsigned long)l_ptr); + tipc_k_signal((Handler)link_start, (unsigned long)l_ptr); dbg("tipc_link_create(): tolerance = %u,cont intv = %u, abort_limit = %u\n", l_ptr->tolerance, l_ptr->continuity_interval, l_ptr->abort_limit); @@ -482,9 +485,9 @@ void tipc_link_delete(struct link *l_ptr) kfree(l_ptr); } -void tipc_link_start(struct link *l_ptr) +static void link_start(struct link *l_ptr) { - dbg("tipc_link_start %x\n", l_ptr); + dbg("link_start %x\n", l_ptr); link_state_event(l_ptr, STARTING_EVT); } @@ -1000,7 +1003,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) /* Fragmentation needed ? */ if (size > max_packet) - return tipc_link_send_long_buf(l_ptr, buf); + return link_send_long_buf(l_ptr, buf); /* Packet can be queued or sent: */ @@ -1036,7 +1039,7 @@ int tipc_link_send_buf(struct link *l_ptr, struct sk_buff *buf) /* Try creating a new bundle */ if (size <= max_packet * 2 / 3) { - struct sk_buff *bundler = buf_acquire(max_packet); + struct sk_buff *bundler = tipc_buf_acquire(max_packet); struct tipc_msg bundler_hdr; if (bundler) { @@ -1312,7 +1315,7 @@ again: /* Prepare header of first fragment: */ - buf_chain = buf = buf_acquire(max_pkt); + buf_chain = buf = tipc_buf_acquire(max_pkt); if (!buf) return -ENOMEM; buf->next = NULL; @@ -1369,7 +1372,7 @@ error: msg_set_size(&fragm_hdr, fragm_sz + INT_H_SIZE); msg_set_fragm_no(&fragm_hdr, ++fragm_no); prev = buf; - buf = buf_acquire(fragm_sz + INT_H_SIZE); + buf = tipc_buf_acquire(fragm_sz + INT_H_SIZE); if (!buf) goto error; @@ -2145,7 +2148,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, if (tipc_bearer_congested(l_ptr->b_ptr, l_ptr)) { if (!l_ptr->proto_msg_queue) { l_ptr->proto_msg_queue = - buf_acquire(sizeof(l_ptr->proto_msg)); + tipc_buf_acquire(sizeof(l_ptr->proto_msg)); } buf = l_ptr->proto_msg_queue; if (!buf) @@ -2159,7 +2162,7 @@ void tipc_link_send_proto_msg(struct link *l_ptr, u32 msg_typ, int probe_msg, msg_dbg(msg, ">>"); - buf = buf_acquire(msg_size); + buf = tipc_buf_acquire(msg_size); if (!buf) return; @@ -2318,10 +2321,10 @@ exit: * tipc_link_tunnel(): Send one message via a link belonging to * another bearer. Owner node is locked. */ -void tipc_link_tunnel(struct link *l_ptr, - struct tipc_msg *tunnel_hdr, - struct tipc_msg *msg, - u32 selector) +static void tipc_link_tunnel(struct link *l_ptr, + struct tipc_msg *tunnel_hdr, + struct tipc_msg *msg, + u32 selector) { struct link *tunnel; struct sk_buff *buf; @@ -2334,7 +2337,7 @@ void tipc_link_tunnel(struct link *l_ptr, return; } msg_set_size(tunnel_hdr, length + INT_H_SIZE); - buf = buf_acquire(length + INT_H_SIZE); + buf = tipc_buf_acquire(length + INT_H_SIZE); if (!buf) { warn("Link changeover error, " "unable to send tunnel msg\n"); @@ -2380,7 +2383,7 @@ void tipc_link_changeover(struct link *l_ptr) if (!l_ptr->first_out) { struct sk_buff *buf; - buf = buf_acquire(INT_H_SIZE); + buf = tipc_buf_acquire(INT_H_SIZE); if (buf) { skb_copy_to_linear_data(buf, &tunnel_hdr, INT_H_SIZE); msg_set_size(&tunnel_hdr, INT_H_SIZE); @@ -2441,7 +2444,7 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *tunnel) msg_set_ack(msg, mod(l_ptr->next_in_no - 1)); /* Update */ msg_set_bcast_ack(msg, l_ptr->owner->bclink.last_in); msg_set_size(&tunnel_hdr, length + INT_H_SIZE); - outbuf = buf_acquire(length + INT_H_SIZE); + outbuf = tipc_buf_acquire(length + INT_H_SIZE); if (outbuf == NULL) { warn("Link changeover error, " "unable to send duplicate msg\n"); @@ -2477,7 +2480,7 @@ static struct sk_buff *buf_extract(struct sk_buff *skb, u32 from_pos) u32 size = msg_size(msg); struct sk_buff *eb; - eb = buf_acquire(size); + eb = tipc_buf_acquire(size); if (eb) skb_copy_to_linear_data(eb, msg, size); return eb; @@ -2605,11 +2608,11 @@ void tipc_link_recv_bundle(struct sk_buff *buf) /* - * tipc_link_send_long_buf: Entry for buffers needing fragmentation. + * link_send_long_buf: Entry for buffers needing fragmentation. * The buffer is complete, inclusive total message length. * Returns user data length. */ -int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) +static int link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) { struct tipc_msg *inmsg = buf_msg(buf); struct tipc_msg fragm_hdr; @@ -2648,7 +2651,7 @@ int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf) fragm_sz = rest; msg_set_type(&fragm_hdr, LAST_FRAGMENT); } - fragm = buf_acquire(fragm_sz + INT_H_SIZE); + fragm = tipc_buf_acquire(fragm_sz + INT_H_SIZE); if (fragm == NULL) { warn("Link unable to fragment message\n"); dsz = -ENOMEM; @@ -2753,7 +2756,7 @@ int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, buf_discard(fbuf); return 0; } - pbuf = buf_acquire(msg_size(imsg)); + pbuf = tipc_buf_acquire(msg_size(imsg)); if (pbuf != NULL) { pbuf->next = *pending; *pending = pbuf; diff --git a/net/tipc/link.h b/net/tipc/link.h index 4e944ef4a54..f98bc613de6 100644 --- a/net/tipc/link.h +++ b/net/tipc/link.h @@ -225,7 +225,6 @@ void tipc_link_send_duplicate(struct link *l_ptr, struct link *dest); void tipc_link_reset_fragments(struct link *l_ptr); int tipc_link_is_up(struct link *l_ptr); int tipc_link_is_active(struct link *l_ptr); -void tipc_link_start(struct link *l_ptr); u32 tipc_link_push_packet(struct link *l_ptr); void tipc_link_stop(struct link *l_ptr); struct sk_buff *tipc_link_cmd_config(const void *req_tlv_area, int req_tlv_space, u16 cmd); @@ -239,9 +238,6 @@ int tipc_link_send_sections_fast(struct port* sender, struct iovec const *msg_sect, const u32 num_sect, u32 destnode); -int tipc_link_send_long_buf(struct link *l_ptr, struct sk_buff *buf); -void tipc_link_tunnel(struct link *l_ptr, struct tipc_msg *tnl_hdr, - struct tipc_msg *msg, u32 selector); void tipc_link_recv_bundle(struct sk_buff *buf); int tipc_link_recv_fragment(struct sk_buff **pending, struct sk_buff **fb, diff --git a/net/tipc/msg.c b/net/tipc/msg.c index 381063817b4..ecb532fb035 100644 --- a/net/tipc/msg.c +++ b/net/tipc/msg.c @@ -112,7 +112,7 @@ int tipc_msg_build(struct tipc_msg *hdr, return dsz; } - *buf = buf_acquire(sz); + *buf = tipc_buf_acquire(sz); if (!(*buf)) return -ENOMEM; skb_copy_to_linear_data(*buf, hdr, hsz); diff --git a/net/tipc/name_distr.c b/net/tipc/name_distr.c index 6ac3c543250..7b907171f87 100644 --- a/net/tipc/name_distr.c +++ b/net/tipc/name_distr.c @@ -98,7 +98,7 @@ static void publ_to_item(struct distr_item *i, struct publication *p) static struct sk_buff *named_prepare_buf(u32 type, u32 size, u32 dest) { - struct sk_buff *buf = buf_acquire(LONG_H_SIZE + size); + struct sk_buff *buf = tipc_buf_acquire(LONG_H_SIZE + size); struct tipc_msg *msg; if (buf != NULL) { diff --git a/net/tipc/node.c b/net/tipc/node.c index 823e9abb7ef..b4d87eb2dc5 100644 --- a/net/tipc/node.c +++ b/net/tipc/node.c @@ -50,7 +50,8 @@ void node_print(struct print_buf *buf, struct tipc_node *n_ptr, char *str); static void node_lost_contact(struct tipc_node *n_ptr); static void node_established_contact(struct tipc_node *n_ptr); -struct tipc_node *tipc_nodes = NULL; /* sorted list of nodes within cluster */ +/* sorted list of nodes within cluster */ +static struct tipc_node *tipc_nodes = NULL; static DEFINE_SPINLOCK(node_create_lock); @@ -587,22 +588,6 @@ void tipc_node_remove_router(struct tipc_node *n_ptr, u32 router) node_lost_contact(n_ptr); } -u32 tipc_available_nodes(const u32 domain) -{ - struct tipc_node *n_ptr; - u32 cnt = 0; - - read_lock_bh(&tipc_net_lock); - for (n_ptr = tipc_nodes; n_ptr; n_ptr = n_ptr->next) { - if (!tipc_in_scope(domain, n_ptr->addr)) - continue; - if (tipc_node_is_up(n_ptr)) - cnt++; - } - read_unlock_bh(&tipc_net_lock); - return cnt; -} - struct sk_buff *tipc_node_get_nodes(const void *req_tlv_area, int req_tlv_space) { u32 domain; diff --git a/net/tipc/node.h b/net/tipc/node.h index 45f3db3a595..fff331b2d26 100644 --- a/net/tipc/node.h +++ b/net/tipc/node.h @@ -96,7 +96,6 @@ struct tipc_node { } bclink; }; -extern struct tipc_node *tipc_nodes; extern u32 tipc_own_tag; struct tipc_node *tipc_node_create(u32 addr); diff --git a/net/tipc/port.c b/net/tipc/port.c index 5c4285b2d55..82092eaa153 100644 --- a/net/tipc/port.c +++ b/net/tipc/port.c @@ -293,34 +293,6 @@ int tipc_deleteport(u32 ref) return 0; } -/** - * tipc_get_port() - return port associated with 'ref' - * - * Note: Port is not locked. - */ - -struct tipc_port *tipc_get_port(const u32 ref) -{ - return (struct tipc_port *)tipc_ref_deref(ref); -} - -/** - * tipc_get_handle - return user handle associated to port 'ref' - */ - -void *tipc_get_handle(const u32 ref) -{ - struct port *p_ptr; - void * handle; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return NULL; - handle = p_ptr->publ.usr_handle; - tipc_port_unlock(p_ptr); - return handle; -} - static int port_unreliable(struct port *p_ptr) { return msg_src_droppable(&p_ptr->publ.phdr); @@ -392,7 +364,7 @@ static struct sk_buff *port_build_proto_msg(u32 destport, u32 destnode, struct sk_buff *buf; struct tipc_msg *msg; - buf = buf_acquire(LONG_H_SIZE); + buf = tipc_buf_acquire(LONG_H_SIZE); if (buf) { msg = buf_msg(buf); tipc_msg_init(msg, usr, type, LONG_H_SIZE, destnode); @@ -433,7 +405,7 @@ int tipc_reject_msg(struct sk_buff *buf, u32 err) hdr_sz = MCAST_H_SIZE; else hdr_sz = LONG_H_SIZE; - rbuf = buf_acquire(data_sz + hdr_sz); + rbuf = tipc_buf_acquire(data_sz + hdr_sz); if (rbuf == NULL) { buf_discard(buf); return data_sz; @@ -1242,50 +1214,13 @@ int tipc_shutdown(u32 ref) return tipc_disconnect(ref); } -int tipc_isconnected(u32 ref, int *isconnected) -{ - struct port *p_ptr; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - *isconnected = p_ptr->publ.connected; - tipc_port_unlock(p_ptr); - return 0; -} - -int tipc_peer(u32 ref, struct tipc_portid *peer) -{ - struct port *p_ptr; - int res; - - p_ptr = tipc_port_lock(ref); - if (!p_ptr) - return -EINVAL; - if (p_ptr->publ.connected) { - peer->ref = port_peerport(p_ptr); - peer->node = port_peernode(p_ptr); - res = 0; - } else - res = -ENOTCONN; - tipc_port_unlock(p_ptr); - return res; -} - -int tipc_ref_valid(u32 ref) -{ - /* Works irrespective of type */ - return !!tipc_ref_deref(ref); -} - - /* * tipc_port_recv_sections(): Concatenate and deliver sectioned * message for this node. */ -int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, - struct iovec const *msg_sect) +static int tipc_port_recv_sections(struct port *sender, unsigned int num_sect, + struct iovec const *msg_sect) { struct sk_buff *buf; int res; @@ -1335,66 +1270,17 @@ int tipc_send(u32 ref, unsigned int num_sect, struct iovec const *msg_sect) return -ELINKCONG; } -/** - * tipc_send_buf - send message buffer on connection - */ - -int tipc_send_buf(u32 ref, struct sk_buff *buf, unsigned int dsz) -{ - struct port *p_ptr; - struct tipc_msg *msg; - u32 destnode; - u32 hsz; - u32 sz; - u32 res; - - p_ptr = tipc_port_deref(ref); - if (!p_ptr || !p_ptr->publ.connected) - return -EINVAL; - - msg = &p_ptr->publ.phdr; - hsz = msg_hdr_sz(msg); - sz = hsz + dsz; - msg_set_size(msg, sz); - if (skb_cow(buf, hsz)) - return -ENOMEM; - - skb_push(buf, hsz); - skb_copy_to_linear_data(buf, msg, hsz); - destnode = msg_destnode(msg); - p_ptr->publ.congested = 1; - if (!tipc_port_congested(p_ptr)) { - if (likely(destnode != tipc_own_addr)) - res = tipc_send_buf_fast(buf, destnode); - else { - tipc_port_recv_msg(buf); - res = sz; - } - if (likely(res != -ELINKCONG)) { - port_incr_out_seqno(p_ptr); - p_ptr->sent++; - p_ptr->publ.congested = 0; - return res; - } - } - if (port_unreliable(p_ptr)) { - p_ptr->publ.congested = 0; - return dsz; - } - return -ELINKCONG; -} - /** * tipc_forward2name - forward message sections to port name */ -int tipc_forward2name(u32 ref, - struct tipc_name const *name, - u32 domain, - u32 num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +static int tipc_forward2name(u32 ref, + struct tipc_name const *name, + u32 domain, + u32 num_sect, + struct iovec const *msg_sect, + struct tipc_portid const *orig, + unsigned int importance) { struct port *p_ptr; struct tipc_msg *msg; @@ -1456,90 +1342,16 @@ int tipc_send2name(u32 ref, TIPC_PORT_IMPORTANCE); } -/** - * tipc_forward_buf2name - forward message buffer to port name - */ - -int tipc_forward_buf2name(u32 ref, - struct tipc_name const *name, - u32 domain, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance) -{ - struct port *p_ptr; - struct tipc_msg *msg; - u32 destnode = domain; - u32 destport; - int res; - - p_ptr = (struct port *)tipc_ref_deref(ref); - if (!p_ptr || p_ptr->publ.connected) - return -EINVAL; - - msg = &p_ptr->publ.phdr; - if (importance <= TIPC_CRITICAL_IMPORTANCE) - msg_set_importance(msg, importance); - msg_set_type(msg, TIPC_NAMED_MSG); - msg_set_orignode(msg, orig->node); - msg_set_origport(msg, orig->ref); - msg_set_nametype(msg, name->type); - msg_set_nameinst(msg, name->instance); - msg_set_lookup_scope(msg, tipc_addr_scope(domain)); - msg_set_hdr_sz(msg, LONG_H_SIZE); - msg_set_size(msg, LONG_H_SIZE + dsz); - destport = tipc_nametbl_translate(name->type, name->instance, &destnode); - msg_set_destnode(msg, destnode); - msg_set_destport(msg, destport); - msg_dbg(msg, "forw2name ==> "); - if (skb_cow(buf, LONG_H_SIZE)) - return -ENOMEM; - skb_push(buf, LONG_H_SIZE); - skb_copy_to_linear_data(buf, msg, LONG_H_SIZE); - msg_dbg(buf_msg(buf),"PREP:"); - if (likely(destport)) { - p_ptr->sent++; - if (destnode == tipc_own_addr) - return tipc_port_recv_msg(buf); - res = tipc_send_buf_fast(buf, destnode); - if (likely(res != -ELINKCONG)) - return res; - if (port_unreliable(p_ptr)) - return dsz; - return -ELINKCONG; - } - return tipc_reject_msg(buf, TIPC_ERR_NO_NAME); -} - -/** - * tipc_send_buf2name - send message buffer to port name - */ - -int tipc_send_buf2name(u32 ref, - struct tipc_name const *dest, - u32 domain, - struct sk_buff *buf, - unsigned int dsz) -{ - struct tipc_portid orig; - - orig.ref = ref; - orig.node = tipc_own_addr; - return tipc_forward_buf2name(ref, dest, domain, buf, dsz, &orig, - TIPC_PORT_IMPORTANCE); -} - /** * tipc_forward2port - forward message sections to port identity */ -int tipc_forward2port(u32 ref, - struct tipc_portid const *dest, - unsigned int num_sect, - struct iovec const *msg_sect, - struct tipc_portid const *orig, - unsigned int importance) +static int tipc_forward2port(u32 ref, + struct tipc_portid const *dest, + unsigned int num_sect, + struct iovec const *msg_sect, + struct tipc_portid const *orig, + unsigned int importance) { struct port *p_ptr; struct tipc_msg *msg; @@ -1591,12 +1403,12 @@ int tipc_send2port(u32 ref, /** * tipc_forward_buf2port - forward message buffer to port identity */ -int tipc_forward_buf2port(u32 ref, - struct tipc_portid const *dest, - struct sk_buff *buf, - unsigned int dsz, - struct tipc_portid const *orig, - unsigned int importance) +static int tipc_forward_buf2port(u32 ref, + struct tipc_portid const *dest, + struct sk_buff *buf, + unsigned int dsz, + struct tipc_portid const *orig, + unsigned int importance) { struct port *p_ptr; struct tipc_msg *msg; diff --git a/net/tipc/port.h b/net/tipc/port.h index e74bd956373..73bbf442b34 100644 --- a/net/tipc/port.h +++ b/net/tipc/port.h @@ -109,8 +109,6 @@ struct port { extern spinlock_t tipc_port_list_lock; struct port_list; -int tipc_port_recv_sections(struct port *p_ptr, u32 num_sect, - struct iovec const *msg_sect); int tipc_port_reject_sections(struct port *p_ptr, struct tipc_msg *hdr, struct iovec const *msg_sect, u32 num_sect, int err); diff --git a/net/tipc/ref.c b/net/tipc/ref.c index 8dea66500cf..ab8ad32d8c2 100644 --- a/net/tipc/ref.c +++ b/net/tipc/ref.c @@ -282,23 +282,6 @@ void *tipc_ref_lock(u32 ref) return NULL; } -/** - * tipc_ref_unlock - unlock referenced object - */ - -void tipc_ref_unlock(u32 ref) -{ - if (likely(tipc_ref_table.entries)) { - struct reference *entry; - - entry = &tipc_ref_table.entries[ref & - tipc_ref_table.index_mask]; - if (likely((entry->ref == ref) && (entry->object))) - spin_unlock_bh(&entry->lock); - else - err("Attempt to unlock non-existent reference\n"); - } -} /** * tipc_ref_deref - return pointer referenced object (without locking it) diff --git a/net/tipc/ref.h b/net/tipc/ref.h index 7e3798ea93b..5bc8e7ab84d 100644 --- a/net/tipc/ref.h +++ b/net/tipc/ref.h @@ -44,7 +44,6 @@ u32 tipc_ref_acquire(void *object, spinlock_t **lock); void tipc_ref_discard(u32 ref); void *tipc_ref_lock(u32 ref); -void tipc_ref_unlock(u32 ref); void *tipc_ref_deref(u32 ref); #endif diff --git a/net/tipc/subscr.c b/net/tipc/subscr.c index 1a5b9a6bd12..18813acc6be 100644 --- a/net/tipc/subscr.c +++ b/net/tipc/subscr.c @@ -598,12 +598,3 @@ void tipc_subscr_stop(void) topsrv.user_ref = 0; } } - - -int tipc_ispublished(struct tipc_name const *name) -{ - u32 domain = 0; - - return tipc_nametbl_translate(name->type, name->instance, &domain) != 0; -} - diff --git a/net/tipc/zone.c b/net/tipc/zone.c index 2c01ba2d86b..83f8b5e91fc 100644 --- a/net/tipc/zone.c +++ b/net/tipc/zone.c @@ -160,14 +160,3 @@ u32 tipc_zone_select_router(struct _zone *z_ptr, u32 addr, u32 ref) } return 0; } - - -u32 tipc_zone_next_node(u32 addr) -{ - struct cluster *c_ptr = tipc_cltr_find(addr); - - if (c_ptr) - return tipc_cltr_next_node(c_ptr, addr); - return 0; -} - diff --git a/net/tipc/zone.h b/net/tipc/zone.h index 7bdc3406ba9..bd1c20ce9d0 100644 --- a/net/tipc/zone.h +++ b/net/tipc/zone.h @@ -61,7 +61,6 @@ void tipc_zone_send_external_routes(struct _zone *z_ptr, u32 dest); struct _zone *tipc_zone_create(u32 addr); void tipc_zone_delete(struct _zone *z_ptr); void tipc_zone_attach_cluster(struct _zone *z_ptr, struct cluster *c_ptr); -u32 tipc_zone_next_node(u32 addr); static inline struct _zone *tipc_zone_find(u32 addr) { -- cgit v1.2.3 From 8e602ce2980fd6941dc0d3dda12e5095e8206f34 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 14 Oct 2010 05:56:18 +0000 Subject: netns: reorder fields in struct net In a network bench, I noticed an unfortunate false sharing between 'loopback_dev' and 'count' fields in "struct net". 'count' is written each time a socket is created or destroyed, while loopback_dev might be often read in routing code. Move loopback_dev in a read mostly section of "struct net" Note: struct netns_xfrm is cache line aligned on SMP. (It contains a "struct dst_ops") Move it at the end to avoid holes, and reduce sizeof(struct net) by 128 bytes on ia32. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/net_namespace.h | 17 ++++++++++------- include/net/netns/xfrm.h | 9 +++++---- 2 files changed, 15 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index bd10a790899..65af9a07cf7 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -41,6 +41,8 @@ struct net { * destroy on demand */ #endif + spinlock_t rules_mod_lock; + struct list_head list; /* list of network namespaces */ struct list_head cleanup_list; /* namespaces on death row */ struct list_head exit_list; /* Use only net_mutex */ @@ -52,7 +54,8 @@ struct net { struct ctl_table_set sysctls; #endif - struct net_device *loopback_dev; /* The loopback */ + struct sock *rtnl; /* rtnetlink socket */ + struct sock *genl_sock; struct list_head dev_base_head; struct hlist_head *dev_name_head; @@ -60,11 +63,9 @@ struct net { /* core fib_rules */ struct list_head rules_ops; - spinlock_t rules_mod_lock; - struct sock *rtnl; /* rtnetlink socket */ - struct sock *genl_sock; + struct net_device *loopback_dev; /* The loopback */ struct netns_core core; struct netns_mib mib; struct netns_packet packet; @@ -84,13 +85,15 @@ struct net { struct sock *nfnl; struct sock *nfnl_stash; #endif -#ifdef CONFIG_XFRM - struct netns_xfrm xfrm; -#endif #ifdef CONFIG_WEXT_CORE struct sk_buff_head wext_nlevents; #endif struct net_generic *gen; + + /* Note : following structs are cache line aligned */ +#ifdef CONFIG_XFRM + struct netns_xfrm xfrm; +#endif }; diff --git a/include/net/netns/xfrm.h b/include/net/netns/xfrm.h index 74f119a2829..748f91f87cd 100644 --- a/include/net/netns/xfrm.h +++ b/include/net/netns/xfrm.h @@ -43,10 +43,6 @@ struct netns_xfrm { unsigned int policy_count[XFRM_POLICY_MAX * 2]; struct work_struct policy_hash_work; - struct dst_ops xfrm4_dst_ops; -#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) - struct dst_ops xfrm6_dst_ops; -#endif struct sock *nlsk; struct sock *nlsk_stash; @@ -58,6 +54,11 @@ struct netns_xfrm { #ifdef CONFIG_SYSCTL struct ctl_table_header *sysctl_hdr; #endif + + struct dst_ops xfrm4_dst_ops; +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) + struct dst_ops xfrm6_dst_ops; +#endif }; #endif -- cgit v1.2.3 From ebbf41df4aabb6d506fa18ea8cb4c2b4388a18b9 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 19 Oct 2010 10:19:06 +0200 Subject: netfilter: ctnetlink: add expectation deletion events This patch allows to listen to events that inform about expectations destroyed. Signed-off-by: Pablo Neira Ayuso Signed-off-by: Patrick McHardy --- include/linux/netfilter/nf_conntrack_common.h | 1 + include/net/netfilter/nf_conntrack_expect.h | 8 ++++++- net/netfilter/nf_conntrack_expect.c | 6 ++++-- net/netfilter/nf_conntrack_netlink.c | 30 +++++++++++++++++++-------- 4 files changed, 33 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter/nf_conntrack_common.h b/include/linux/netfilter/nf_conntrack_common.h index 23a1a08578a..50cdc2559a5 100644 --- a/include/linux/netfilter/nf_conntrack_common.h +++ b/include/linux/netfilter/nf_conntrack_common.h @@ -98,6 +98,7 @@ enum ip_conntrack_events { enum ip_conntrack_expect_events { IPEXP_NEW, /* new expectation */ + IPEXP_DESTROY, /* destroyed expectation */ }; /* expectation flags */ diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h index 416b8384448..0f8a8c58753 100644 --- a/include/net/netfilter/nf_conntrack_expect.h +++ b/include/net/netfilter/nf_conntrack_expect.h @@ -82,7 +82,13 @@ struct nf_conntrack_expect * nf_ct_find_expectation(struct net *net, u16 zone, const struct nf_conntrack_tuple *tuple); -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp); +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 pid, int report); +static inline void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +{ + nf_ct_unlink_expect_report(exp, 0, 0); +} + void nf_ct_remove_expectations(struct nf_conn *ct); void nf_ct_unexpect_related(struct nf_conntrack_expect *exp); void nf_ct_remove_userspace_expectations(void); diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c index b30a1f2aac0..46e8966912b 100644 --- a/net/netfilter/nf_conntrack_expect.c +++ b/net/netfilter/nf_conntrack_expect.c @@ -41,7 +41,8 @@ static struct kmem_cache *nf_ct_expect_cachep __read_mostly; static HLIST_HEAD(nf_ct_userspace_expect_list); /* nf_conntrack_expect helper functions */ -void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) +void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp, + u32 pid, int report) { struct nf_conn_help *master_help = nfct_help(exp->master); struct net *net = nf_ct_exp_net(exp); @@ -55,11 +56,12 @@ void nf_ct_unlink_expect(struct nf_conntrack_expect *exp) if (!(exp->flags & NF_CT_EXPECT_USERSPACE)) master_help->expecting[exp->class]--; + nf_ct_expect_event_report(IPEXP_DESTROY, exp, pid, report); nf_ct_expect_put(exp); NF_CT_STAT_INC(net, expect_delete); } -EXPORT_SYMBOL_GPL(nf_ct_unlink_expect); +EXPORT_SYMBOL_GPL(nf_ct_unlink_expect_report); static void nf_ct_expectation_timed_out(unsigned long ul_expect) { diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index b4077be5f66..62bad229106 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -1632,17 +1632,20 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct sk_buff *skb; - unsigned int type; + unsigned int type, group; int flags = 0; - if (events & (1 << IPEXP_NEW)) { + if (events & (1 << IPEXP_DESTROY)) { + type = IPCTNL_MSG_EXP_DELETE; + group = NFNLGRP_CONNTRACK_EXP_DESTROY; + } else if (events & (1 << IPEXP_NEW)) { type = IPCTNL_MSG_EXP_NEW; flags = NLM_F_CREATE|NLM_F_EXCL; + group = NFNLGRP_CONNTRACK_EXP_NEW; } else return 0; - if (!item->report && - !nfnetlink_has_listeners(net, NFNLGRP_CONNTRACK_EXP_NEW)) + if (!item->report && !nfnetlink_has_listeners(net, group)) return 0; skb = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); @@ -1665,8 +1668,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, NFNLGRP_CONNTRACK_EXP_NEW, - item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -1849,7 +1851,13 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, } /* after list removal, usage count == 1 */ - nf_ct_unexpect_related(exp); + spin_lock_bh(&nf_conntrack_lock); + if (del_timer(&exp->timeout)) { + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nlmsg_report(nlh)); + nf_ct_expect_put(exp); + } + spin_unlock_bh(&nf_conntrack_lock); /* have to put what we 'get' above. * after this line usage count == 0 */ nf_ct_expect_put(exp); @@ -1866,7 +1874,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, m_help = nfct_help(exp->master); if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); + nf_ct_unlink_expect_report(exp, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_expect_put(exp); } } @@ -1880,7 +1890,9 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, &net->ct.expect_hash[i], hnode) { if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect(exp); + nf_ct_unlink_expect_report(exp, + NETLINK_CB(skb).pid, + nlmsg_report(nlh)); nf_ct_expect_put(exp); } } -- cgit v1.2.3 From 714f095f74582764d629785f03b459a3d0503624 Mon Sep 17 00:00:00 2001 From: Hans Schillstrom Date: Tue, 19 Oct 2010 10:38:48 +0200 Subject: ipvs: IPv6 tunnel mode IPv6 encapsulation uses a bad source address for the tunnel. i.e. VIP will be used as local-addr and encap. dst addr. Decapsulation will not accept this. Example LVS (eth1 2003::2:0:1/96, VIP 2003::2:0:100) (eth0 2003::1:0:1/96) RS (ethX 2003::1:0:5/96) tcpdump 2003::2:0:100 > 2003::1:0:5: IP6 (hlim 63, next-header TCP (6) payload length: 40) 2003::3:0:10.50991 > 2003::2:0:100.http: Flags [S], cksum 0x7312 (correct), seq 3006460279, win 5760, options [mss 1440,sackOK,TS val 1904932 ecr 0,nop,wscale 3], length 0 In Linux IPv6 impl. you can't have a tunnel with an any cast address receiving packets (I have not tried to interpret RFC 2473) To have receive capabilities the tunnel must have: - Local address set as multicast addr or an unicast addr - Remote address set as an unicast addr. - Loop back addres or Link local address are not allowed. This causes us to setup a tunnel in the Real Server with the LVS as the remote address, here you can't use the VIP address since it's used inside the tunnel. Solution Use outgoing interface IPv6 address (match against the destination). i.e. use ip6_route_output() to look up the route cache and then use ipv6_dev_get_saddr(...) to set the source address of the encapsulated packet. Additionally, cache the results in new destination fields: dst_cookie and dst_saddr and properly check the returned dst from ip6_route_output. We now add xfrm_lookup call only for the tunneling method where the source address is a local one. Signed-off-by:Hans Schillstrom Signed-off-by: Patrick McHardy --- include/net/ip_vs.h | 4 + net/netfilter/ipvs/ip_vs_xmit.c | 171 +++++++++++++++++++++------------------- 2 files changed, 96 insertions(+), 79 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 52fbe2308c3..6e8a6192e57 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -529,6 +529,10 @@ struct ip_vs_dest { spinlock_t dst_lock; /* lock of dst_cache */ struct dst_entry *dst_cache; /* destination cache entry */ u32 dst_rtos; /* RT_TOS(tos) for dst */ + u32 dst_cookie; +#ifdef CONFIG_IP_VS_IPV6 + struct in6_addr dst_saddr; +#endif /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 8817afa34e6..b0bd8afbf36 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -26,6 +26,7 @@ #include /* for ip_route_output */ #include #include +#include #include #include #include @@ -37,26 +38,27 @@ * Destination cache to speed up outgoing route lookup */ static inline void -__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst) +__ip_vs_dst_set(struct ip_vs_dest *dest, u32 rtos, struct dst_entry *dst, + u32 dst_cookie) { struct dst_entry *old_dst; old_dst = dest->dst_cache; dest->dst_cache = dst; dest->dst_rtos = rtos; + dest->dst_cookie = dst_cookie; dst_release(old_dst); } static inline struct dst_entry * -__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) +__ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) { struct dst_entry *dst = dest->dst_cache; if (!dst) return NULL; - if ((dst->obsolete - || (dest->af == AF_INET && rtos != dest->dst_rtos)) && - dst->ops->check(dst, cookie) == NULL) { + if ((dst->obsolete || rtos != dest->dst_rtos) && + dst->ops->check(dst, dest->dst_cookie) == NULL) { dest->dst_cache = NULL; dst_release(dst); return NULL; @@ -66,15 +68,16 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos, u32 cookie) } static struct rtable * -__ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) +__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) { + struct net *net = dev_net(skb->dev); struct rtable *rt; /* Route to the other host */ struct ip_vs_dest *dest = cp->dest; if (dest) { spin_lock(&dest->dst_lock); if (!(rt = (struct rtable *) - __ip_vs_dst_check(dest, rtos, 0))) { + __ip_vs_dst_check(dest, rtos))) { struct flowi fl = { .oif = 0, .nl_u = { @@ -84,13 +87,13 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .tos = rtos, } }, }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { spin_unlock(&dest->dst_lock); IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &dest->addr.ip); return NULL; } - __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst)); + __ip_vs_dst_set(dest, rtos, dst_clone(&rt->dst), 0); IP_VS_DBG(10, "new dst %pI4, refcnt=%d, rtos=%X\n", &dest->addr.ip, atomic_read(&rt->dst.__refcnt), rtos); @@ -106,7 +109,7 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) .tos = rtos, } }, }; - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &cp->daddr.ip); return NULL; @@ -117,62 +120,79 @@ __ip_vs_get_out_rt(struct ip_vs_conn *cp, u32 rtos) } #ifdef CONFIG_IP_VS_IPV6 + +static struct dst_entry * +__ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, + struct in6_addr *ret_saddr, int do_xfrm) +{ + struct dst_entry *dst; + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip6_u = { + .daddr = *daddr, + }, + }, + }; + + dst = ip6_route_output(net, NULL, &fl); + if (dst->error) + goto out_err; + if (!ret_saddr) + return dst; + if (ipv6_addr_any(&fl.fl6_src) && + ipv6_dev_get_saddr(net, ip6_dst_idev(dst)->dev, + &fl.fl6_dst, 0, &fl.fl6_src) < 0) + goto out_err; + if (do_xfrm && xfrm_lookup(net, &dst, &fl, NULL, 0) < 0) + goto out_err; + ipv6_addr_copy(ret_saddr, &fl.fl6_src); + return dst; + +out_err: + dst_release(dst); + IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", daddr); + return NULL; +} + static struct rt6_info * -__ip_vs_get_out_rt_v6(struct ip_vs_conn *cp) +__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, + struct in6_addr *ret_saddr, int do_xfrm) { + struct net *net = dev_net(skb->dev); struct rt6_info *rt; /* Route to the other host */ struct ip_vs_dest *dest = cp->dest; + struct dst_entry *dst; if (dest) { spin_lock(&dest->dst_lock); - rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0, 0); + rt = (struct rt6_info *)__ip_vs_dst_check(dest, 0); if (!rt) { - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = dest->addr.in6, - .saddr = { - .s6_addr32 = - { 0, 0, 0, 0 }, - }, - }, - }, - }; + u32 cookie; - rt = (struct rt6_info *)ip6_route_output(&init_net, - NULL, &fl); - if (!rt) { + dst = __ip_vs_route_output_v6(net, &dest->addr.in6, + &dest->dst_saddr, + do_xfrm); + if (!dst) { spin_unlock(&dest->dst_lock); - IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", - &dest->addr.in6); return NULL; } - __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst)); - IP_VS_DBG(10, "new dst %pI6, refcnt=%d\n", - &dest->addr.in6, + rt = (struct rt6_info *) dst; + cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + __ip_vs_dst_set(dest, 0, dst_clone(&rt->dst), cookie); + IP_VS_DBG(10, "new dst %pI6, src %pI6, refcnt=%d\n", + &dest->addr.in6, &dest->dst_saddr, atomic_read(&rt->dst.__refcnt)); } + if (ret_saddr) + ipv6_addr_copy(ret_saddr, &dest->dst_saddr); spin_unlock(&dest->dst_lock); } else { - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = cp->daddr.in6, - .saddr = { - .s6_addr32 = { 0, 0, 0, 0 }, - }, - }, - }, - }; - - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); - if (!rt) { - IP_VS_DBG_RL("ip6_route_output error, dest: %pI6\n", - &cp->daddr.in6); + dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, + do_xfrm); + if (!dst) return NULL; - } + rt = (struct rt6_info *) dst; } return rt; @@ -248,6 +268,7 @@ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct net *net = dev_net(skb->dev); struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); u8 tos = iph->tos; @@ -263,7 +284,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (ip_route_output_key(&init_net, &rt, &fl)) { + if (ip_route_output_key(net, &rt, &fl)) { IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", __func__, &iph->daddr); goto tx_error_icmp; @@ -313,25 +334,18 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { + struct net *net = dev_net(skb->dev); + struct dst_entry *dst; struct rt6_info *rt; /* Route to the other host */ struct ipv6hdr *iph = ipv6_hdr(skb); int mtu; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip6_u = { - .daddr = iph->daddr, - .saddr = { .s6_addr32 = {0, 0, 0, 0} }, } }, - }; EnterFunction(10); - rt = (struct rt6_info *)ip6_route_output(&init_net, NULL, &fl); - if (!rt) { - IP_VS_DBG_RL("%s(): ip6_route_output error, dest: %pI6\n", - __func__, &iph->daddr); + dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); + if (!dst) goto tx_error_icmp; - } + rt = (struct rt6_info *) dst; /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -397,7 +411,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -472,7 +486,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; @@ -557,7 +571,6 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct iphdr *old_iph = ip_hdr(skb); u8 tos = old_iph->tos; __be16 df = old_iph->frag_off; - sk_buff_data_t old_transport_header = skb->transport_header; struct iphdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -572,7 +585,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) goto tx_error_icmp; tdev = rt->dst.dev; @@ -616,7 +629,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ip_hdr(skb); } - skb->transport_header = old_transport_header; + skb->transport_header = skb->network_header; /* fix old IP header checksum */ ip_send_check(old_iph); @@ -670,9 +683,9 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { struct rt6_info *rt; /* Route to the other host */ + struct in6_addr saddr; /* Source for tunnel */ struct net_device *tdev; /* Device to other host */ struct ipv6hdr *old_iph = ipv6_hdr(skb); - sk_buff_data_t old_transport_header = skb->transport_header; struct ipv6hdr *iph; /* Our new IP header */ unsigned int max_headroom; /* The extra header space needed */ int mtu; @@ -687,17 +700,17 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); if (!rt) goto tx_error_icmp; tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); - /* TODO IPv6: do we need this check in IPv6? */ - if (mtu < 1280) { + if (mtu < IPV6_MIN_MTU) { dst_release(&rt->dst); - IP_VS_DBG_RL("%s(): mtu less than 1280\n", __func__); + IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, + IPV6_MIN_MTU); goto tx_error; } if (skb_dst(skb)) @@ -730,7 +743,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, old_iph = ipv6_hdr(skb); } - skb->transport_header = old_transport_header; + skb->transport_header = skb->network_header; skb_push(skb, sizeof(struct ipv6hdr)); skb_reset_network_header(skb); @@ -750,8 +763,8 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, be16_add_cpu(&iph->payload_len, sizeof(*old_iph)); iph->priority = old_iph->priority; memset(&iph->flow_lbl, 0, sizeof(iph->flow_lbl)); - iph->daddr = rt->rt6i_dst.addr; - iph->saddr = cp->vaddr.in6; /* rt->rt6i_src.addr; */ + ipv6_addr_copy(&iph->daddr, &cp->daddr.in6); + ipv6_addr_copy(&iph->saddr, &saddr); iph->hop_limit = old_iph->hop_limit; /* Another hack: avoid icmp_send in ip_fragment */ @@ -791,7 +804,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -843,7 +856,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; @@ -919,7 +932,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - if (!(rt = __ip_vs_get_out_rt(cp, RT_TOS(ip_hdr(skb)->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) goto tx_error_icmp; /* MTU checking */ @@ -993,7 +1006,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - rt = __ip_vs_get_out_rt_v6(cp); + rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); if (!rt) goto tx_error_icmp; -- cgit v1.2.3 From 8b27b10f5863a5b63e46304a71aa01463d1efac4 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:17:20 +0300 Subject: ipvs: optimize checksums for apps Avoid full checksum calculation for apps that can provide info whether csum was broken after payload mangling. For now only ip_vs_ftp mangles payload and it updates the csum, so the full recalculation is avoided for all packets. Add CHECKSUM_UNNECESSARY for snat_handler (TCP and UDP). It is needed to support SNAT from local address for the case when csum is fully recalculated. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 12 ++++++++++-- net/netfilter/ipvs/ip_vs_ftp.c | 7 ++++++- net/netfilter/ipvs/ip_vs_proto_tcp.c | 31 +++++++++++++++++++++++++------ net/netfilter/ipvs/ip_vs_proto_udp.c | 31 +++++++++++++++++++++++++------ 4 files changed, 66 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 6e8a6192e57..adcdba9dd18 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -597,11 +597,19 @@ struct ip_vs_app { __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ - /* output hook: return false if can't linearize. diff set for TCP. */ + /* + * output hook: Process packet in inout direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_out)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); - /* input hook: return false if can't linearize. diff set for TCP. */ + /* + * input hook: Process packet in outin direction, diff set for TCP. + * Return: 0=Error, 1=Payload Not Mangled/Mangled but checksum is ok, + * 2=Mangled but checksum was not updated + */ int (*pkt_in)(struct ip_vs_app *, struct ip_vs_conn *, struct sk_buff *, int *diff); diff --git a/net/netfilter/ipvs/ip_vs_ftp.c b/net/netfilter/ipvs/ip_vs_ftp.c index 090889a3b3a..75455000ad1 100644 --- a/net/netfilter/ipvs/ip_vs_ftp.c +++ b/net/netfilter/ipvs/ip_vs_ftp.c @@ -242,9 +242,14 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, ret = nf_nat_mangle_tcp_packet(skb, ct, ctinfo, start-data, end-start, buf, buf_len); - if (ret) + if (ret) { ip_vs_nfct_expect_related(skb, ct, n_cp, IPPROTO_TCP, 0, 0); + if (skb->ip_summed == CHECKSUM_COMPLETE) + skb->ip_summed = CHECKSUM_UNNECESSARY; + /* csum is updated */ + ret = 1; + } } /* diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 318d011036d..64dc2954cf7 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -120,6 +120,7 @@ tcp_snat_handler(struct sk_buff *skb, struct tcphdr *tcph; unsigned int tcphoff; int oldlen; + int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -134,13 +135,20 @@ tcp_snat_handler(struct sk_buff *skb, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; /* Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!(ret = ip_vs_app_pkt_out(cp, skb))) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 1) + oldlen = skb->len - tcphoff; + else + payload_csum = 1; } tcph = (void *)skb_network_header(skb) + tcphoff; @@ -151,12 +159,13 @@ tcp_snat_handler(struct sk_buff *skb, tcp_partial_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, htons(oldlen), htons(skb->len - tcphoff)); - } else if (!cp->app) { + } else if (!payload_csum) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = (cp->app && pp->csum_check) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; @@ -174,6 +183,7 @@ tcp_snat_handler(struct sk_buff *skb, skb->len - tcphoff, cp->protocol, skb->csum); + skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, tcph->check, @@ -190,6 +200,7 @@ tcp_dnat_handler(struct sk_buff *skb, struct tcphdr *tcph; unsigned int tcphoff; int oldlen; + int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -204,6 +215,8 @@ tcp_dnat_handler(struct sk_buff *skb, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; @@ -212,8 +225,13 @@ tcp_dnat_handler(struct sk_buff *skb, * Attempt ip_vs_app call. * It will fix ip_vs_conn and iph ack_seq stuff */ - if (!ip_vs_app_pkt_in(cp, skb)) + if (!(ret = ip_vs_app_pkt_in(cp, skb))) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 1) + oldlen = skb->len - tcphoff; + else + payload_csum = 1; } tcph = (void *)skb_network_header(skb) + tcphoff; @@ -226,12 +244,13 @@ tcp_dnat_handler(struct sk_buff *skb, tcp_partial_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, htons(oldlen), htons(skb->len - tcphoff)); - } else if (!cp->app) { + } else if (!payload_csum) { /* Only port and addr are changed, do fast csum update */ tcp_fast_csum_update(cp->af, tcph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = (cp->app && pp->csum_check) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ tcph->check = 0; diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index f9290893bd9..9c558c40bfb 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -121,6 +121,7 @@ udp_snat_handler(struct sk_buff *skb, struct udphdr *udph; unsigned int udphoff; int oldlen; + int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -135,6 +136,8 @@ udp_snat_handler(struct sk_buff *skb, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; @@ -142,8 +145,13 @@ udp_snat_handler(struct sk_buff *skb, /* * Call application helper if needed */ - if (!ip_vs_app_pkt_out(cp, skb)) + if (!(ret = ip_vs_app_pkt_out(cp, skb))) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 1) + oldlen = skb->len - udphoff; + else + payload_csum = 1; } udph = (void *)skb_network_header(skb) + udphoff; @@ -156,12 +164,13 @@ udp_snat_handler(struct sk_buff *skb, udp_partial_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, htons(oldlen), htons(skb->len - udphoff)); - } else if (!cp->app && (udph->check != 0)) { + } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->daddr, &cp->vaddr, cp->dport, cp->vport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = (cp->app && pp->csum_check) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; @@ -181,6 +190,7 @@ udp_snat_handler(struct sk_buff *skb, skb->csum); if (udph->check == 0) udph->check = CSUM_MANGLED_0; + skb->ip_summed = CHECKSUM_UNNECESSARY; IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); @@ -196,6 +206,7 @@ udp_dnat_handler(struct sk_buff *skb, struct udphdr *udph; unsigned int udphoff; int oldlen; + int payload_csum = 0; #ifdef CONFIG_IP_VS_IPV6 if (cp->af == AF_INET6) @@ -210,6 +221,8 @@ udp_dnat_handler(struct sk_buff *skb, return 0; if (unlikely(cp->app != NULL)) { + int ret; + /* Some checks before mangling */ if (pp->csum_check && !pp->csum_check(cp->af, skb, pp)) return 0; @@ -218,8 +231,13 @@ udp_dnat_handler(struct sk_buff *skb, * Attempt ip_vs_app call. * It will fix ip_vs_conn */ - if (!ip_vs_app_pkt_in(cp, skb)) + if (!(ret = ip_vs_app_pkt_in(cp, skb))) return 0; + /* ret=2: csum update is needed after payload mangling */ + if (ret == 1) + oldlen = skb->len - udphoff; + else + payload_csum = 1; } udph = (void *)skb_network_header(skb) + udphoff; @@ -232,12 +250,13 @@ udp_dnat_handler(struct sk_buff *skb, udp_partial_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, htons(oldlen), htons(skb->len - udphoff)); - } else if (!cp->app && (udph->check != 0)) { + } else if (!payload_csum && (udph->check != 0)) { /* Only port and addr are changed, do fast csum update */ udp_fast_csum_update(cp->af, udph, &cp->vaddr, &cp->daddr, cp->vport, cp->dport); if (skb->ip_summed == CHECKSUM_COMPLETE) - skb->ip_summed = CHECKSUM_NONE; + skb->ip_summed = (cp->app && pp->csum_check) ? + CHECKSUM_UNNECESSARY : CHECKSUM_NONE; } else { /* full checksum calculation */ udph->check = 0; -- cgit v1.2.3 From cf356d69db0afef692cd640917bc70f708c27f14 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:21:07 +0300 Subject: ipvs: switch to notrack mode Change skb->ipvs_property semantic. This is preparation to support ip_vs_out processing in LOCAL_OUT. ipvs_property=1 will be used to avoid expensive lookups for traffic sent by transmitters. Now when conntrack support is not used we call ip_vs_notrack method to avoid problems in OUTPUT and POST_ROUTING hooks instead of exiting POST_ROUTING as before. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 20 +++++++++++++++++++- net/netfilter/ipvs/ip_vs_core.c | 39 ++++----------------------------------- net/netfilter/ipvs/ip_vs_xmit.c | 7 +++++-- 3 files changed, 28 insertions(+), 38 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index adcdba9dd18..0e4618470ce 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -25,7 +25,7 @@ #include #include /* for struct ipv6hdr */ #include /* for ipv6_addr_copy */ -#ifdef CONFIG_IP_VS_NFCT +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) #include #endif @@ -1021,6 +1021,24 @@ static inline __wsum ip_vs_check_diff2(__be16 old, __be16 new, __wsum oldsum) return csum_partial(diff, sizeof(diff), oldsum); } +/* + * Forget current conntrack (unconfirmed) and attach notrack entry + */ +static inline void ip_vs_notrack(struct sk_buff *skb) +{ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (!ct || !nf_ct_is_untracked(ct)) { + nf_reset(skb); + skb->nfct = &nf_ct_untracked_get()->ct_general; + skb->nfctinfo = IP_CT_NEW; + nf_conntrack_get(skb->nfct); + } +#endif +} + #ifdef CONFIG_IP_VS_NFCT /* * Netfilter connection tracking diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index e5fef7aef0d..222453029b9 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -507,23 +507,6 @@ int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, return NF_DROP; } -/* - * It is hooked before NF_IP_PRI_NAT_SRC at the NF_INET_POST_ROUTING - * chain and is used to avoid double NAT and confirmation when we do - * not want to keep the conntrack structure - */ -static unsigned int ip_vs_post_routing(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - if (!skb->ipvs_property) - return NF_ACCEPT; - /* The packet was sent from IPVS, exit this chain */ - return NF_STOP; -} - __sum16 ip_vs_checksum_complete(struct sk_buff *skb, int offset) { return csum_fold(skb_checksum(skb, offset, skb->len - offset, 0)); @@ -682,8 +665,9 @@ static int handle_response_icmp(int af, struct sk_buff *skb, /* do the statistics and put it back */ ip_vs_out_stats(cp, skb); + skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) - skb->ipvs_property = 1; + ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); verdict = NF_ACCEPT; @@ -929,8 +913,9 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); + skb->ipvs_property = 1; if (!(cp->flags & IP_VS_CONN_F_NFCT)) - skb->ipvs_property = 1; + ip_vs_notrack(skb); else ip_vs_update_conntrack(skb, cp, 0); ip_vs_conn_put(cp); @@ -1496,14 +1481,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP_PRI_NAT_SRC-1, - }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1532,14 +1509,6 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, - /* Before the netfilter connection tracking, exit from POST_ROUTING */ - { - .hook = ip_vs_post_routing, - .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_POST_ROUTING, - .priority = NF_IP6_PRI_NAT_SRC-1, - }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b0bd8afbf36..94b53b44102 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -217,6 +217,7 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) ({ \ int __ret = NF_ACCEPT; \ \ + (skb)->ipvs_property = 1; \ if (unlikely((cp)->flags & IP_VS_CONN_F_NFCT)) \ __ret = ip_vs_confirm_conntrack(skb, cp); \ if (__ret == NF_ACCEPT) { \ @@ -228,8 +229,9 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) #define IP_VS_XMIT_NAT(pf, skb, cp) \ do { \ + (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - (skb)->ipvs_property = 1; \ + ip_vs_notrack(skb); \ else \ ip_vs_update_conntrack(skb, cp, 1); \ skb_forward_csum(skb); \ @@ -239,8 +241,9 @@ do { \ #define IP_VS_XMIT(pf, skb, cp) \ do { \ + (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ - (skb)->ipvs_property = 1; \ + ip_vs_notrack(skb); \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ skb_dst(skb)->dev, dst_output); \ -- cgit v1.2.3 From 190ecd27cd7294105e3b26ca71663c7d940acbbb Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:24:37 +0300 Subject: ipvs: do not schedule conns from real servers This patch is needed to avoid scheduling of packets from local real server when we add ip_vs_in in LOCAL_OUT hook to support local client. Currently, when ip_vs_in can not find existing connection it tries to create new one by calling ip_vs_schedule. The default indication from ip_vs_schedule was if connection was scheduled to real server. If real server is not available we try to use the bypass forwarding method or to send ICMP error. But in some cases we do not want to use the bypass feature. So, add flag 'ignored' to indicate if the scheduler ignores this packet. Make sure we do not create new connections from replies. We can hit this problem for persistent services and local real server when ip_vs_in is added to LOCAL_OUT hook to handle local clients. Also, make sure ip_vs_schedule ignores SYN packets for Active FTP DATA from local real server. The FTP DATA connection should be created on SYN+ACK from client to assign correct connection daddr. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 3 ++- net/netfilter/ipvs/ip_vs_core.c | 34 ++++++++++++++++++++++++++++++++-- net/netfilter/ipvs/ip_vs_proto_sctp.c | 6 ++++-- net/netfilter/ipvs/ip_vs_proto_tcp.c | 7 +++++-- net/netfilter/ipvs/ip_vs_proto_udp.c | 6 ++++-- 5 files changed, 47 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 0e4618470ce..9d5c1b96530 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -849,7 +849,8 @@ extern int ip_vs_unbind_scheduler(struct ip_vs_service *svc); extern struct ip_vs_scheduler *ip_vs_scheduler_get(const char *sched_name); extern void ip_vs_scheduler_put(struct ip_vs_scheduler *scheduler); extern struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb); +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored); extern int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 222453029b9..0090d6d25e9 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -342,7 +342,8 @@ ip_vs_sched_persist(struct ip_vs_service *svc, * Protocols supported: TCP, UDP */ struct ip_vs_conn * -ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) +ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, + struct ip_vs_protocol *pp, int *ignored) { struct ip_vs_conn *cp = NULL; struct ip_vs_iphdr iph; @@ -350,16 +351,43 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) __be16 _ports[2], *pptr; unsigned int flags; + *ignored = 1; ip_vs_fill_iphdr(svc->af, skb_network_header(skb), &iph); pptr = skb_header_pointer(skb, iph.len, sizeof(_ports), _ports); if (pptr == NULL) return NULL; + /* + * FTPDATA needs this check when using local real server. + * Never schedule Active FTPDATA connections from real server. + * For LVS-NAT they must be already created. For other methods + * with persistence the connection is created on SYN+ACK. + */ + if (pptr[0] == FTPDATA) { + IP_VS_DBG_PKT(12, pp, skb, 0, "Not scheduling FTPDATA"); + return NULL; + } + + /* + * Do not schedule replies from local real server. It is risky + * for fwmark services but mostly for persistent services. + */ + if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && + (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { + IP_VS_DBG_PKT(12, pp, skb, 0, + "Not scheduling reply for existing connection"); + __ip_vs_conn_put(cp); + return NULL; + } + /* * Persistent service */ - if (svc->flags & IP_VS_SVC_F_PERSISTENT) + if (svc->flags & IP_VS_SVC_F_PERSISTENT) { + *ignored = 0; return ip_vs_sched_persist(svc, skb, pptr); + } /* * Non-persistent service @@ -372,6 +400,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb) return NULL; } + *ignored = 0; + dest = svc->scheduler->schedule(svc, skb); if (dest == NULL) { IP_VS_DBG(1, "Schedule: no dest found.\n"); diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 4c0855cb006..9ab5232ce01 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -31,6 +31,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, if ((sch->type == SCTP_CID_INIT) && (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, sh->dest))) { + int ignored; + if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -44,8 +46,8 @@ sctp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb); - if (!*cpp) { + *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + if (!*cpp && !ignored) { *verdict = ip_vs_leave(svc, skb, pp); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 64dc2954cf7..85d80a66b49 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -43,9 +43,12 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } + /* No !th->ack check to allow scheduling on SYN+ACK for Active FTP */ if (th->syn && (svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, th->dest))) { + int ignored; + if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -60,8 +63,8 @@ tcp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb); - if (!*cpp) { + *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + if (!*cpp && !ignored) { *verdict = ip_vs_leave(svc, skb, pp); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 9c558c40bfb..5d21f08155e 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -46,6 +46,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, svc = ip_vs_service_get(af, skb->mark, iph.protocol, &iph.daddr, uh->dest); if (svc) { + int ignored; + if (ip_vs_todrop()) { /* * It seems that we are very loaded. @@ -60,8 +62,8 @@ udp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, * Let the virtual server select a real server for the * incoming connection, and create a connection entry. */ - *cpp = ip_vs_schedule(svc, skb); - if (!*cpp) { + *cpp = ip_vs_schedule(svc, skb, pp, &ignored); + if (!*cpp && !ignored) { *verdict = ip_vs_leave(svc, skb, pp); return 0; } -- cgit v1.2.3 From fc604767613b6d2036cdc35b660bc39451040a47 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:38:15 +0300 Subject: ipvs: changes for local real server This patch deals with local real servers: - Add support for DNAT to local address (different real server port). It needs ip_vs_out hook in LOCAL_OUT for both families because skb->protocol is not set for locally generated packets and can not be used to set 'af'. - Skip packets in ip_vs_in marked with skb->ipvs_property because ip_vs_out processing can be executed in LOCAL_OUT but we still have the conn_out_get check in ip_vs_in. - Ignore packets with inet->nodefrag from local stack - Require skb_dst(skb) != NULL because we use it to get struct net - Add support for changing the route to local IPv4 stack after DNAT depending on the source address type. Local client sets output route and the remote client sets input route. It looks like IPv6 does not need such rerouting because the replies use addresses from initial incoming header, not from skb route. - All transmitters now have strict checks for the destination address type: redirect from non-local address to local real server requires NAT method, local address can not be used as source address when talking to remote real server. - Now LOCALNODE is not set explicitly as forwarding method in real server to allow the connections to provide correct forwarding method to the backup server. Not sure if this breaks tools that expect to see 'Local' real server type. If needed, this can be supported with new flag IP_VS_DEST_F_LOCAL. Now it should be possible connections in backup that lost their fwmark information during sync to be forwarded properly to their daddr, even if it is local address in the backup server. By this way backup could be used as real server for DR or TUN, for NAT there are some restrictions because tuple collisions in conntracks can create problems for the traffic. - Call ip_vs_dst_reset when destination is updated in case some real server IP type is changed between local and remote. [ horms@verge.net.au: removed trailing whitespace ] Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 1 + net/netfilter/ipvs/ip_vs_core.c | 123 ++++++++++-- net/netfilter/ipvs/ip_vs_ctl.c | 18 +- net/netfilter/ipvs/ip_vs_xmit.c | 433 ++++++++++++++++++++++++++++++++-------- 4 files changed, 458 insertions(+), 117 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 9d5c1b96530..2f88d594233 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -409,6 +409,7 @@ struct ip_vs_conn { /* packet transmitter for different forwarding methods. If it mangles the packet, it must return NF_DROP or better NF_STOLEN, otherwise this must be changed to a sk_buff **. + NF_ACCEPT can be returned when destination is local. */ int (*packet_xmit)(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp); diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index c4f091d5a62..a6c8aff1b47 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -984,26 +984,34 @@ drop: } /* - * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. * Check if outgoing packet belongs to the established ip_vs_conn. */ static unsigned int -ip_vs_out(unsigned int hooknum, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) +ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) { struct ip_vs_iphdr iph; struct ip_vs_protocol *pp; struct ip_vs_conn *cp; - int af; EnterFunction(11); - af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; - + /* Already marked as IPVS request or reply? */ if (skb->ipvs_property) return NF_ACCEPT; + /* Bad... Do not break raw sockets */ + if (unlikely(skb->sk != NULL && hooknum == NF_INET_LOCAL_OUT && + af == AF_INET)) { + struct sock *sk = skb->sk; + struct inet_sock *inet = inet_sk(skb->sk); + + if (inet && sk->sk_family == PF_INET && inet->nodefrag) + return NF_ACCEPT; + } + + if (unlikely(!skb_dst(skb))) + return NF_ACCEPT; + ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); #ifdef CONFIG_IP_VS_IPV6 if (af == AF_INET6) { @@ -1106,6 +1114,69 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, return handle_response(af, skb, pp, cp, iph.len); } +/* + * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_reply4(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip_vs_out(hooknum, skb, AF_INET); +} + +/* + * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_local_reply4(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int verdict; + + /* Disable BH in LOCAL_OUT until all places are fixed */ + local_bh_disable(); + verdict = ip_vs_out(hooknum, skb, AF_INET); + local_bh_enable(); + return verdict; +} + +#ifdef CONFIG_IP_VS_IPV6 + +/* + * It is hooked at the NF_INET_FORWARD chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_reply6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + return ip_vs_out(hooknum, skb, AF_INET6); +} + +/* + * It is hooked at the NF_INET_LOCAL_OUT chain, used only for VS/NAT. + * Check if packet is reply for established ip_vs_conn. + */ +static unsigned int +ip_vs_local_reply6(unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int verdict; + + /* Disable BH in LOCAL_OUT until all places are fixed */ + local_bh_disable(); + verdict = ip_vs_out(hooknum, skb, AF_INET6); + local_bh_enable(); + return verdict; +} + +#endif /* * Handle ICMP messages in the outside-to-inside direction (incoming). @@ -1342,6 +1413,10 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, struct ip_vs_conn *cp; int ret, restart, af, pkts; + /* Already marked as IPVS request or reply? */ + if (skb->ipvs_property) + return NF_ACCEPT; + af = (skb->protocol == htons(ETH_P_IP)) ? AF_INET : AF_INET6; ip_vs_fill_iphdr(af, skb_network_header(skb), &iph); @@ -1525,13 +1600,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_IN, .priority = 100, }, - /* After packet filtering, change source only for VS/NAT */ + /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_out, + .hook = ip_vs_local_reply4, .owner = THIS_MODULE, .pf = PF_INET, - .hooknum = NF_INET_FORWARD, - .priority = 100, + .hooknum = NF_INET_LOCAL_OUT, + .priority = -99, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1542,6 +1617,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, + /* After packet filtering, change source only for VS/NAT */ + { + .hook = ip_vs_reply4, + .owner = THIS_MODULE, + .pf = PF_INET, + .hooknum = NF_INET_FORWARD, + .priority = 100, + }, #ifdef CONFIG_IP_VS_IPV6 /* After packet filtering, forward packet through VS/DR, VS/TUN, * or VS/NAT(change destination), so that filtering rules can be @@ -1553,13 +1636,13 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_IN, .priority = 100, }, - /* After packet filtering, change source only for VS/NAT */ + /* Before ip_vs_in, change source only for VS/NAT */ { - .hook = ip_vs_out, + .hook = ip_vs_local_reply6, .owner = THIS_MODULE, - .pf = PF_INET6, - .hooknum = NF_INET_FORWARD, - .priority = 100, + .pf = PF_INET, + .hooknum = NF_INET_LOCAL_OUT, + .priority = -99, }, /* After packet filtering (but before ip_vs_out_icmp), catch icmp * destined for 0.0.0.0/0, which is for incoming IPVS connections */ @@ -1570,6 +1653,14 @@ static struct nf_hook_ops ip_vs_ops[] __read_mostly = { .hooknum = NF_INET_FORWARD, .priority = 99, }, + /* After packet filtering, change source only for VS/NAT */ + { + .hook = ip_vs_reply6, + .owner = THIS_MODULE, + .pf = PF_INET6, + .hooknum = NF_INET_FORWARD, + .priority = 100, + }, #endif }; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 0b884d3e192..5f5daa30b0a 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -777,20 +777,6 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, conn_flags = udest->conn_flags & IP_VS_CONN_F_DEST_MASK; conn_flags |= IP_VS_CONN_F_INACTIVE; - /* check if local node and update the flags */ -#ifdef CONFIG_IP_VS_IPV6 - if (svc->af == AF_INET6) { - if (__ip_vs_addr_is_local_v6(&udest->addr.in6)) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } - } else -#endif - if (inet_addr_type(&init_net, udest->addr.ip) == RTN_LOCAL) { - conn_flags = (conn_flags & ~IP_VS_CONN_F_FWD_MASK) - | IP_VS_CONN_F_LOCALNODE; - } - /* set the IP_VS_CONN_F_NOOUTPUT flag if not masquerading/NAT */ if ((conn_flags & IP_VS_CONN_F_FWD_MASK) != IP_VS_CONN_F_MASQ) { conn_flags |= IP_VS_CONN_F_NOOUTPUT; @@ -824,6 +810,10 @@ __ip_vs_update_dest(struct ip_vs_service *svc, struct ip_vs_dest *dest, dest->u_threshold = udest->u_threshold; dest->l_threshold = udest->l_threshold; + spin_lock(&dest->dst_lock); + ip_vs_dst_reset(dest); + spin_unlock(&dest->dst_lock); + if (add) ip_vs_new_estimator(&dest->stats); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 63cc0feaaef..8608882f89e 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -67,12 +67,19 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) return dst; } +/* + * Get route to destination or remote server + * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, + * &4=Allow redirect from remote daddr to local + */ static struct rtable * -__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) +__ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_dest *dest, + __be32 daddr, u32 rtos, int rt_mode) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb_dst(skb)->dev); struct rtable *rt; /* Route to the other host */ - struct ip_vs_dest *dest = cp->dest; + struct rtable *ort; /* Original route */ + int local; if (dest) { spin_lock(&dest->dst_lock); @@ -104,23 +111,95 @@ __ip_vs_get_out_rt(struct sk_buff *skb, struct ip_vs_conn *cp, u32 rtos) .oif = 0, .nl_u = { .ip4_u = { - .daddr = cp->daddr.ip, + .daddr = daddr, .saddr = 0, .tos = rtos, } }, }; if (ip_route_output_key(net, &rt, &fl)) { IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", - &cp->daddr.ip); + &daddr); return NULL; } } + local = rt->rt_flags & RTCF_LOCAL; + if (!((local ? 1 : 2) & rt_mode)) { + IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI4\n", + (rt->rt_flags & RTCF_LOCAL) ? + "local":"non-local", &rt->rt_dst); + ip_rt_put(rt); + return NULL; + } + if (local && !(rt_mode & 4) && !((ort = skb_rtable(skb)) && + ort->rt_flags & RTCF_LOCAL)) { + IP_VS_DBG_RL("Redirect from non-local address %pI4 to local " + "requires NAT method, dest: %pI4\n", + &ip_hdr(skb)->daddr, &rt->rt_dst); + ip_rt_put(rt); + return NULL; + } + if (unlikely(!local && ipv4_is_loopback(ip_hdr(skb)->saddr))) { + IP_VS_DBG_RL("Stopping traffic from loopback address %pI4 " + "to non-local address, dest: %pI4\n", + &ip_hdr(skb)->saddr, &rt->rt_dst); + ip_rt_put(rt); + return NULL; + } + return rt; } +/* Reroute packet to local IPv4 stack after DNAT */ +static int +__ip_vs_reroute_locally(struct sk_buff *skb) +{ + struct rtable *rt = skb_rtable(skb); + struct net_device *dev = rt->dst.dev; + struct net *net = dev_net(dev); + struct iphdr *iph = ip_hdr(skb); + + if (rt->fl.iif) { + unsigned long orefdst = skb->_skb_refdst; + + if (ip_route_input(skb, iph->daddr, iph->saddr, + iph->tos, skb->dev)) + return 0; + refdst_drop(orefdst); + } else { + struct flowi fl = { + .oif = 0, + .nl_u = { + .ip4_u = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .tos = RT_TOS(iph->tos), + } + }, + .mark = skb->mark, + }; + struct rtable *rt; + + if (ip_route_output_key(net, &rt, &fl)) + return 0; + if (!(rt->rt_flags & RTCF_LOCAL)) { + ip_rt_put(rt); + return 0; + } + /* Drop old route. */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + } + return 1; +} + #ifdef CONFIG_IP_VS_IPV6 +static inline int __ip_vs_is_local_route6(struct rt6_info *rt) +{ + return rt->rt6i_dev && rt->rt6i_dev->flags & IFF_LOOPBACK; +} + static struct dst_entry * __ip_vs_route_output_v6(struct net *net, struct in6_addr *daddr, struct in6_addr *ret_saddr, int do_xfrm) @@ -155,14 +234,21 @@ out_err: return NULL; } +/* + * Get route to destination or remote server + * rt_mode: flags, &1=Allow local dest, &2=Allow non-local dest, + * &4=Allow redirect from remote daddr to local + */ static struct rt6_info * -__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, - struct in6_addr *ret_saddr, int do_xfrm) +__ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_dest *dest, + struct in6_addr *daddr, struct in6_addr *ret_saddr, + int do_xfrm, int rt_mode) { - struct net *net = dev_net(skb->dev); + struct net *net = dev_net(skb_dst(skb)->dev); struct rt6_info *rt; /* Route to the other host */ - struct ip_vs_dest *dest = cp->dest; + struct rt6_info *ort; /* Original route */ struct dst_entry *dst; + int local; if (dest) { spin_lock(&dest->dst_lock); @@ -188,13 +274,38 @@ __ip_vs_get_out_rt_v6(struct sk_buff *skb, struct ip_vs_conn *cp, ipv6_addr_copy(ret_saddr, &dest->dst_saddr); spin_unlock(&dest->dst_lock); } else { - dst = __ip_vs_route_output_v6(net, &cp->daddr.in6, ret_saddr, - do_xfrm); + dst = __ip_vs_route_output_v6(net, daddr, ret_saddr, do_xfrm); if (!dst) return NULL; rt = (struct rt6_info *) dst; } + local = __ip_vs_is_local_route6(rt); + if (!((local ? 1 : 2) & rt_mode)) { + IP_VS_DBG_RL("Stopping traffic to %s address, dest: %pI6\n", + local ? "local":"non-local", daddr); + dst_release(&rt->dst); + return NULL; + } + if (local && !(rt_mode & 4) && + !((ort = (struct rt6_info *) skb_dst(skb)) && + __ip_vs_is_local_route6(ort))) { + IP_VS_DBG_RL("Redirect from non-local address %pI6 to local " + "requires NAT method, dest: %pI6\n", + &ipv6_hdr(skb)->daddr, daddr); + dst_release(&rt->dst); + return NULL; + } + if (unlikely(!local && (!skb->dev || skb->dev->flags & IFF_LOOPBACK) && + ipv6_addr_type(&ipv6_hdr(skb)->saddr) & + IPV6_ADDR_LOOPBACK)) { + IP_VS_DBG_RL("Stopping traffic from loopback address %pI6 " + "to non-local address, dest: %pI6\n", + &ipv6_hdr(skb)->saddr, daddr); + dst_release(&rt->dst); + return NULL; + } + return rt; } #endif @@ -227,23 +338,27 @@ ip_vs_dst_reset(struct ip_vs_dest *dest) __ret; \ }) -#define IP_VS_XMIT_NAT(pf, skb, cp) \ +#define IP_VS_XMIT_NAT(pf, skb, cp, local) \ do { \ (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ ip_vs_notrack(skb); \ else \ ip_vs_update_conntrack(skb, cp, 1); \ + if (local) \ + return NF_ACCEPT; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ skb_dst(skb)->dev, dst_output); \ } while (0) -#define IP_VS_XMIT(pf, skb, cp) \ +#define IP_VS_XMIT(pf, skb, cp, local) \ do { \ (skb)->ipvs_property = 1; \ if (likely(!((cp)->flags & IP_VS_CONN_F_NFCT))) \ ip_vs_notrack(skb); \ + if (local) \ + return NF_ACCEPT; \ skb_forward_csum(skb); \ NF_HOOK(pf, NF_INET_LOCAL_OUT, (skb), NULL, \ skb_dst(skb)->dev, dst_output); \ @@ -258,7 +373,7 @@ ip_vs_null_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { /* we do not touch skb and do not need pskb ptr */ - return NF_ACCEPT; + IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); } @@ -271,27 +386,15 @@ int ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { - struct net *net = dev_net(skb->dev); struct rtable *rt; /* Route to the other host */ struct iphdr *iph = ip_hdr(skb); - u8 tos = iph->tos; int mtu; - struct flowi fl = { - .oif = 0, - .nl_u = { - .ip4_u = { - .daddr = iph->daddr, - .saddr = 0, - .tos = RT_TOS(tos), } }, - }; EnterFunction(10); - if (ip_route_output_key(net, &rt, &fl)) { - IP_VS_DBG_RL("%s(): ip_route_output error, dest: %pI4\n", - __func__, &iph->daddr); + if (!(rt = __ip_vs_get_out_rt(skb, NULL, iph->daddr, + RT_TOS(iph->tos), 2))) goto tx_error_icmp; - } /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -319,7 +422,7 @@ ip_vs_bypass_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); LeaveFunction(10); return NF_STOLEN; @@ -337,18 +440,14 @@ int ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct ip_vs_protocol *pp) { - struct net *net = dev_net(skb->dev); - struct dst_entry *dst; struct rt6_info *rt; /* Route to the other host */ struct ipv6hdr *iph = ipv6_hdr(skb); int mtu; EnterFunction(10); - dst = __ip_vs_route_output_v6(net, &iph->daddr, NULL, 0); - if (!dst) + if (!(rt = __ip_vs_get_out_rt_v6(skb, NULL, &iph->daddr, NULL, 0, 2))) goto tx_error_icmp; - rt = (struct rt6_info *) dst; /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -376,7 +475,7 @@ ip_vs_bypass_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); LeaveFunction(10); return NF_STOLEN; @@ -401,6 +500,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct rtable *rt; /* Route to the other host */ int mtu; struct iphdr *iph = ip_hdr(skb); + int local; EnterFunction(10); @@ -414,16 +514,40 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + RT_TOS(iph->tos), 1|2|4))) goto tx_error_icmp; + local = rt->rt_flags & RTCF_LOCAL; + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed + */ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (cp->flags & IP_VS_CONN_F_SYNC && local) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (ct && !nf_ct_is_untracked(ct)) { + IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): " + "stopping DNAT to local address"); + goto tx_error_put; + } + } +#endif + + /* From world but DNAT to loopback address? */ + if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { + IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): " + "stopping DNAT to loopback address"); + goto tx_error_put; + } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { - ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); - goto tx_error; + goto tx_error_put; } /* copy-on-write the packet before mangling it */ @@ -433,16 +557,27 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) - goto tx_error; + goto tx_error_put; ip_hdr(skb)->daddr = cp->daddr.ip; ip_send_check(ip_hdr(skb)); + if (!local) { + /* drop old route */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + } else { + ip_rt_put(rt); + /* + * Some IPv4 replies get local address from routes, + * not from iph, so while we DNAT after routing + * we need this second input/output route. + */ + if (!__ip_vs_reroute_locally(skb)) + goto tx_error; + } + IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length @@ -452,7 +587,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp); + IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); LeaveFunction(10); return NF_STOLEN; @@ -475,6 +610,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, { struct rt6_info *rt; /* Route to the other host */ int mtu; + int local; EnterFunction(10); @@ -489,18 +625,44 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, IP_VS_DBG(10, "filled cport=%d\n", ntohs(*p)); } - rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); - if (!rt) + if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + 0, 1|2|4))) goto tx_error_icmp; + local = __ip_vs_is_local_route6(rt); + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed + */ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (cp->flags & IP_VS_CONN_F_SYNC && local) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (ct && !nf_ct_is_untracked(ct)) { + IP_VS_DBG_RL_PKT(10, pp, skb, 0, + "ip_vs_nat_xmit_v6(): " + "stopping DNAT to local address"); + goto tx_error_put; + } + } +#endif + + /* From world but DNAT to loopback address? */ + if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && + ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { + IP_VS_DBG_RL_PKT(1, pp, skb, 0, + "ip_vs_nat_xmit_v6(): " + "stopping DNAT to loopback address"); + goto tx_error_put; + } /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); - goto tx_error; + goto tx_error_put; } /* copy-on-write the packet before mangling it */ @@ -510,14 +672,19 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; - /* drop old route */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - /* mangle the packet */ if (pp->dnat_handler && !pp->dnat_handler(skb, pp, cp)) goto tx_error; - ipv6_hdr(skb)->daddr = cp->daddr.in6; + ipv6_addr_copy(&ipv6_hdr(skb)->daddr, &cp->daddr.in6); + + if (!local || !skb->dev) { + /* drop the old route when skb is not shared */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + } else { + /* destined to loopback, do we need to change route? */ + dst_release(&rt->dst); + } IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); @@ -528,7 +695,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp); + IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); LeaveFunction(10); return NF_STOLEN; @@ -588,16 +755,20 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + RT_TOS(tos), 1|2))) goto tx_error_icmp; + if (rt->rt_flags & RTCF_LOCAL) { + ip_rt_put(rt); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct iphdr); if (mtu < 68) { - ip_rt_put(rt); IP_VS_DBG_RL("%s(): mtu less than 68\n", __func__); - goto tx_error; + goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); @@ -607,9 +778,8 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if ((old_iph->frag_off & htons(IP_DF)) && mtu < ntohs(old_iph->tot_len)) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - ip_rt_put(rt); IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error; + goto tx_error_put; } /* @@ -678,6 +848,9 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; +tx_error_put: + ip_rt_put(rt); + goto tx_error; } #ifdef CONFIG_IP_VS_IPV6 @@ -703,27 +876,29 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - rt = __ip_vs_get_out_rt_v6(skb, cp, &saddr, 1); - if (!rt) + if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, + &saddr, 1, 1|2))) goto tx_error_icmp; + if (__ip_vs_is_local_route6(rt)) { + dst_release(&rt->dst); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); + } tdev = rt->dst.dev; mtu = dst_mtu(&rt->dst) - sizeof(struct ipv6hdr); if (mtu < IPV6_MIN_MTU) { - dst_release(&rt->dst); IP_VS_DBG_RL("%s(): mtu less than %d\n", __func__, IPV6_MIN_MTU); - goto tx_error; + goto tx_error_put; } if (skb_dst(skb)) skb_dst(skb)->ops->update_pmtu(skb_dst(skb), mtu); if (mtu < ntohs(old_iph->payload_len) + sizeof(struct ipv6hdr)) { icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - dst_release(&rt->dst); IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error; + goto tx_error_put; } /* @@ -789,6 +964,9 @@ tx_error: kfree_skb(skb); LeaveFunction(10); return NF_STOLEN; +tx_error_put: + dst_release(&rt->dst); + goto tx_error; } #endif @@ -807,8 +985,13 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(iph->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + RT_TOS(iph->tos), 1|2))) goto tx_error_icmp; + if (rt->rt_flags & RTCF_LOCAL) { + ip_rt_put(rt); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 1); + } /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -836,7 +1019,7 @@ ip_vs_dr_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp); + IP_VS_XMIT(NFPROTO_IPV4, skb, cp, 0); LeaveFunction(10); return NF_STOLEN; @@ -859,9 +1042,13 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, EnterFunction(10); - rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); - if (!rt) + if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + 0, 1|2))) goto tx_error_icmp; + if (__ip_vs_is_local_route6(rt)) { + dst_release(&rt->dst); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 1); + } /* MTU checking */ mtu = dst_mtu(&rt->dst); @@ -889,7 +1076,7 @@ ip_vs_dr_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp); + IP_VS_XMIT(NFPROTO_IPV6, skb, cp, 0); LeaveFunction(10); return NF_STOLEN; @@ -915,6 +1102,7 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct rtable *rt; /* Route to the other host */ int mtu; int rc; + int local; EnterFunction(10); @@ -935,16 +1123,43 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - if (!(rt = __ip_vs_get_out_rt(skb, cp, RT_TOS(ip_hdr(skb)->tos)))) + if (!(rt = __ip_vs_get_out_rt(skb, cp->dest, cp->daddr.ip, + RT_TOS(ip_hdr(skb)->tos), 1|2|4))) goto tx_error_icmp; + local = rt->rt_flags & RTCF_LOCAL; + + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed + */ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (cp->flags & IP_VS_CONN_F_SYNC && local) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (ct && !nf_ct_is_untracked(ct)) { + IP_VS_DBG(10, "%s(): " + "stopping DNAT to local address %pI4\n", + __func__, &cp->daddr.ip); + goto tx_error_put; + } + } +#endif + + /* From world but DNAT to loopback address? */ + if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { + IP_VS_DBG(1, "%s(): " + "stopping DNAT to loopback %pI4\n", + __func__, &cp->daddr.ip); + goto tx_error_put; + } /* MTU checking */ mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (ip_hdr(skb)->frag_off & htons(IP_DF))) { - ip_rt_put(rt); icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, htonl(mtu)); IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error; + goto tx_error_put; } /* copy-on-write the packet before mangling it */ @@ -954,16 +1169,27 @@ ip_vs_icmp_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - ip_vs_nat_icmp(skb, pp, cp, 0); + if (!local) { + /* drop the old route when skb is not shared */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + } else { + ip_rt_put(rt); + /* + * Some IPv4 replies get local address from routes, + * not from iph, so while we DNAT after routing + * we need this second input/output route. + */ + if (!__ip_vs_reroute_locally(skb)) + goto tx_error; + } + /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV4, skb, cp); + IP_VS_XMIT_NAT(NFPROTO_IPV4, skb, cp, local); rc = NF_STOLEN; goto out; @@ -989,6 +1215,7 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct rt6_info *rt; /* Route to the other host */ int mtu; int rc; + int local; EnterFunction(10); @@ -1009,17 +1236,44 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, * mangle and send the packet here (only for VS/NAT) */ - rt = __ip_vs_get_out_rt_v6(skb, cp, NULL, 0); - if (!rt) + if (!(rt = __ip_vs_get_out_rt_v6(skb, cp->dest, &cp->daddr.in6, NULL, + 0, 1|2|4))) goto tx_error_icmp; + local = __ip_vs_is_local_route6(rt); + /* + * Avoid duplicate tuple in reply direction for NAT traffic + * to local address when connection is sync-ed + */ +#if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) + if (cp->flags & IP_VS_CONN_F_SYNC && local) { + enum ip_conntrack_info ctinfo; + struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); + + if (ct && !nf_ct_is_untracked(ct)) { + IP_VS_DBG(10, "%s(): " + "stopping DNAT to local address %pI6\n", + __func__, &cp->daddr.in6); + goto tx_error_put; + } + } +#endif + + /* From world but DNAT to loopback address? */ + if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && + ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { + IP_VS_DBG(1, "%s(): " + "stopping DNAT to loopback %pI6\n", + __func__, &cp->daddr.in6); + goto tx_error_put; + } + /* MTU checking */ mtu = dst_mtu(&rt->dst); if (skb->len > mtu) { - dst_release(&rt->dst); icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); IP_VS_DBG_RL("%s(): frag needed\n", __func__); - goto tx_error; + goto tx_error_put; } /* copy-on-write the packet before mangling it */ @@ -1029,16 +1283,21 @@ ip_vs_icmp_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, if (skb_cow(skb, rt->dst.dev->hard_header_len)) goto tx_error_put; - /* drop the old route when skb is not shared */ - skb_dst_drop(skb); - skb_dst_set(skb, &rt->dst); - ip_vs_nat_icmp_v6(skb, pp, cp, 0); + if (!local || !skb->dev) { + /* drop the old route when skb is not shared */ + skb_dst_drop(skb); + skb_dst_set(skb, &rt->dst); + } else { + /* destined to loopback, do we need to change route? */ + dst_release(&rt->dst); + } + /* Another hack: avoid icmp_send in ip_fragment */ skb->local_df = 1; - IP_VS_XMIT(NFPROTO_IPV6, skb, cp); + IP_VS_XMIT_NAT(NFPROTO_IPV6, skb, cp, local); rc = NF_STOLEN; goto out; -- cgit v1.2.3 From 0d79641a96d612aaa6d57a4d4f521d7ed9c9ccdd Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Sun, 17 Oct 2010 16:46:17 +0300 Subject: ipvs: provide address family for debugging As skb->protocol is not valid in LOCAL_OUT add parameter for address family in packet debugging functions. Even if ports are not present in AH and ESP change them to use ip_vs_tcpudp_debug_packet to show at least valid addresses as before. This patch removes the last user of skb->protocol in IPVS. Signed-off-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 17 ++++++----- net/netfilter/ipvs/ip_vs_core.c | 41 +++++++++++++++----------- net/netfilter/ipvs/ip_vs_proto.c | 8 ++--- net/netfilter/ipvs/ip_vs_proto_ah_esp.c | 52 ++------------------------------- net/netfilter/ipvs/ip_vs_proto_sctp.c | 2 +- net/netfilter/ipvs/ip_vs_proto_tcp.c | 4 +-- net/netfilter/ipvs/ip_vs_proto_udp.c | 4 +-- net/netfilter/ipvs/ip_vs_xmit.c | 18 +++++++----- 8 files changed, 54 insertions(+), 92 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 2f88d594233..b7bbd6c28cf 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -136,24 +136,24 @@ static inline const char *ip_vs_dbg_addr(int af, char *buf, size_t buf_len, if (net_ratelimit()) \ printk(KERN_DEBUG pr_fmt(msg), ##__VA_ARGS__); \ } while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) \ +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) \ do { \ if (level <= ip_vs_get_debug_level() && \ net_ratelimit()) \ - pp->debug_packet(pp, skb, ofs, msg); \ + pp->debug_packet(af, pp, skb, ofs, msg); \ } while (0) #else /* NO DEBUGGING at ALL */ #define IP_VS_DBG_BUF(level, msg...) do {} while (0) #define IP_VS_ERR_BUF(msg...) do {} while (0) #define IP_VS_DBG(level, msg...) do {} while (0) #define IP_VS_DBG_RL(msg...) do {} while (0) -#define IP_VS_DBG_PKT(level, pp, skb, ofs, msg) do {} while (0) -#define IP_VS_DBG_RL_PKT(level, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_PKT(level, af, pp, skb, ofs, msg) do {} while (0) +#define IP_VS_DBG_RL_PKT(level, af, pp, skb, ofs, msg) do {} while (0) #endif #define IP_VS_BUG() BUG() @@ -345,7 +345,7 @@ struct ip_vs_protocol { int (*app_conn_bind)(struct ip_vs_conn *cp); - void (*debug_packet)(struct ip_vs_protocol *pp, + void (*debug_packet)(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg); @@ -828,7 +828,8 @@ extern int ip_vs_set_state_timeout(int *table, int num, const char *const *names, const char *name, int to); extern void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, + const struct sk_buff *skb, int offset, const char *msg); extern struct ip_vs_protocol ip_vs_protocol_tcp; diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 5fbcf67af8e..b4e51e9c5a0 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -365,7 +365,8 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, * with persistence the connection is created on SYN+ACK. */ if (pptr[0] == FTPDATA) { - IP_VS_DBG_PKT(12, pp, skb, 0, "Not scheduling FTPDATA"); + IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, + "Not scheduling FTPDATA"); return NULL; } @@ -376,7 +377,7 @@ ip_vs_schedule(struct ip_vs_service *svc, struct sk_buff *skb, if ((!skb->dev || skb->dev->flags & IFF_LOOPBACK) && (svc->flags & IP_VS_SVC_F_PERSISTENT || svc->fwmark) && (cp = pp->conn_in_get(svc->af, skb, pp, &iph, iph.len, 1))) { - IP_VS_DBG_PKT(12, pp, skb, 0, + IP_VS_DBG_PKT(12, svc->af, pp, skb, 0, "Not scheduling reply for existing connection"); __ip_vs_conn_put(cp); return NULL; @@ -617,10 +618,10 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, skb->ip_summed = CHECKSUM_UNNECESSARY; if (inout) - IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered outgoing ICMP"); else - IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, + IP_VS_DBG_PKT(11, AF_INET, pp, skb, (void *)ciph - (void *)iph, "Forwarding altered incoming ICMP"); } @@ -662,11 +663,13 @@ void ip_vs_nat_icmp_v6(struct sk_buff *skb, struct ip_vs_protocol *pp, skb->ip_summed = CHECKSUM_PARTIAL; if (inout) - IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, - "Forwarding altered outgoing ICMPv6"); + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, + (void *)ciph - (void *)iph, + "Forwarding altered outgoing ICMPv6"); else - IP_VS_DBG_PKT(11, pp, skb, (void *)ciph - (void *)iph, - "Forwarding altered incoming ICMPv6"); + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, + (void *)ciph - (void *)iph, + "Forwarding altered incoming ICMPv6"); } #endif @@ -798,7 +801,8 @@ static int ip_vs_out_icmp(struct sk_buff *skb, int *related, pp->dont_defrag)) return NF_ACCEPT; - IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMP for"); + IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, + "Checking outgoing ICMP for"); offset += cih->ihl * 4; @@ -874,7 +878,8 @@ static int ip_vs_out_icmp_v6(struct sk_buff *skb, int *related, if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) return NF_ACCEPT; - IP_VS_DBG_PKT(11, pp, skb, offset, "Checking outgoing ICMPv6 for"); + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, + "Checking outgoing ICMPv6 for"); offset += sizeof(struct ipv6hdr); @@ -922,7 +927,7 @@ static unsigned int handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, struct ip_vs_conn *cp, int ihl) { - IP_VS_DBG_PKT(11, pp, skb, 0, "Outgoing packet"); + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Outgoing packet"); if (!skb_make_writable(skb, ihl)) goto drop; @@ -967,7 +972,7 @@ handle_response(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, ip_route_me_harder(skb, RTN_LOCAL) != 0) goto drop; - IP_VS_DBG_PKT(10, pp, skb, 0, "After SNAT"); + IP_VS_DBG_PKT(10, af, pp, skb, 0, "After SNAT"); ip_vs_out_stats(cp, skb); ip_vs_set_state(cp, IP_VS_DIR_OUTPUT, skb, pp); @@ -1117,7 +1122,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff *skb, int af) } } } - IP_VS_DBG_PKT(12, pp, skb, 0, + IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_out: packet continues traversal as normal"); return NF_ACCEPT; } @@ -1253,7 +1258,8 @@ ip_vs_in_icmp(struct sk_buff *skb, int *related, unsigned int hooknum) pp->dont_defrag)) return NF_ACCEPT; - IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMP for"); + IP_VS_DBG_PKT(11, AF_INET, pp, skb, offset, + "Checking incoming ICMP for"); offset += cih->ihl * 4; @@ -1364,7 +1370,8 @@ ip_vs_in_icmp_v6(struct sk_buff *skb, int *related, unsigned int hooknum) if (unlikely(cih->nexthdr == IPPROTO_FRAGMENT && pp->dont_defrag)) return NF_ACCEPT; - IP_VS_DBG_PKT(11, pp, skb, offset, "Checking incoming ICMPv6 for"); + IP_VS_DBG_PKT(11, AF_INET6, pp, skb, offset, + "Checking incoming ICMPv6 for"); offset += sizeof(struct ipv6hdr); @@ -1492,12 +1499,12 @@ ip_vs_in(unsigned int hooknum, struct sk_buff *skb, int af) if (unlikely(!cp)) { /* sorry, all this trouble for a no-hit :) */ - IP_VS_DBG_PKT(12, pp, skb, 0, + IP_VS_DBG_PKT(12, af, pp, skb, 0, "ip_vs_in: packet continues traversal as normal"); return NF_ACCEPT; } - IP_VS_DBG_PKT(11, pp, skb, 0, "Incoming packet"); + IP_VS_DBG_PKT(11, af, pp, skb, 0, "Incoming packet"); /* Check the server status */ if (cp->dest && !(cp->dest->flags & IP_VS_DEST_F_AVAILABLE)) { diff --git a/net/netfilter/ipvs/ip_vs_proto.c b/net/netfilter/ipvs/ip_vs_proto.c index 027f654799f..c5399839087 100644 --- a/net/netfilter/ipvs/ip_vs_proto.c +++ b/net/netfilter/ipvs/ip_vs_proto.c @@ -172,8 +172,8 @@ ip_vs_tcpudp_debug_packet_v4(struct ip_vs_protocol *pp, else if (ih->frag_off & htons(IP_OFFSET)) sprintf(buf, "%pI4->%pI4 frag", &ih->saddr, &ih->daddr); else { - __be16 _ports[2], *pptr -; + __be16 _ports[2], *pptr; + pptr = skb_header_pointer(skb, offset + ih->ihl*4, sizeof(_ports), _ports); if (pptr == NULL) @@ -223,13 +223,13 @@ ip_vs_tcpudp_debug_packet_v6(struct ip_vs_protocol *pp, void -ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, +ip_vs_tcpudp_debug_packet(int af, struct ip_vs_protocol *pp, const struct sk_buff *skb, int offset, const char *msg) { #ifdef CONFIG_IP_VS_IPV6 - if (skb->protocol == htons(ETH_P_IPV6)) + if (af == AF_INET6) ip_vs_tcpudp_debug_packet_v6(pp, skb, offset, msg); else #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c index 8956ef33ea6..3a0461117d3 100644 --- a/net/netfilter/ipvs/ip_vs_proto_ah_esp.c +++ b/net/netfilter/ipvs/ip_vs_proto_ah_esp.c @@ -117,54 +117,6 @@ ah_esp_conn_schedule(int af, struct sk_buff *skb, struct ip_vs_protocol *pp, return 0; } - -static void -ah_esp_debug_packet_v4(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct iphdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "TRUNCATED"); - else - sprintf(buf, "%pI4->%pI4", &ih->saddr, &ih->daddr); - - pr_debug("%s: %s %s\n", msg, pp->name, buf); -} - -#ifdef CONFIG_IP_VS_IPV6 -static void -ah_esp_debug_packet_v6(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ - char buf[256]; - struct ipv6hdr _iph, *ih; - - ih = skb_header_pointer(skb, offset, sizeof(_iph), &_iph); - if (ih == NULL) - sprintf(buf, "TRUNCATED"); - else - sprintf(buf, "%pI6->%pI6", &ih->saddr, &ih->daddr); - - pr_debug("%s: %s %s\n", msg, pp->name, buf); -} -#endif - -static void -ah_esp_debug_packet(struct ip_vs_protocol *pp, const struct sk_buff *skb, - int offset, const char *msg) -{ -#ifdef CONFIG_IP_VS_IPV6 - if (skb->protocol == htons(ETH_P_IPV6)) - ah_esp_debug_packet_v6(pp, skb, offset, msg); - else -#endif - ah_esp_debug_packet_v4(pp, skb, offset, msg); -} - - static void ah_esp_init(struct ip_vs_protocol *pp) { /* nothing to do now */ @@ -195,7 +147,7 @@ struct ip_vs_protocol ip_vs_protocol_ah = { .register_app = NULL, .unregister_app = NULL, .app_conn_bind = NULL, - .debug_packet = ah_esp_debug_packet, + .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ .set_state_timeout = NULL, }; @@ -219,7 +171,7 @@ struct ip_vs_protocol ip_vs_protocol_esp = { .register_app = NULL, .unregister_app = NULL, .app_conn_bind = NULL, - .debug_packet = ah_esp_debug_packet, + .debug_packet = ip_vs_tcpudp_debug_packet, .timeout_change = NULL, /* ISAKMP */ }; #endif diff --git a/net/netfilter/ipvs/ip_vs_proto_sctp.c b/net/netfilter/ipvs/ip_vs_proto_sctp.c index 9ab5232ce01..d254345bfda 100644 --- a/net/netfilter/ipvs/ip_vs_proto_sctp.c +++ b/net/netfilter/ipvs/ip_vs_proto_sctp.c @@ -176,7 +176,7 @@ sctp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) if (val != cmp) { /* CRC failure, dump it. */ - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_tcp.c b/net/netfilter/ipvs/ip_vs_proto_tcp.c index 85d80a66b49..f6c5200e214 100644 --- a/net/netfilter/ipvs/ip_vs_proto_tcp.c +++ b/net/netfilter/ipvs/ip_vs_proto_tcp.c @@ -300,7 +300,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) skb->len - tcphoff, ipv6_hdr(skb)->nexthdr, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } @@ -311,7 +311,7 @@ tcp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) skb->len - tcphoff, ip_hdr(skb)->protocol, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_proto_udp.c b/net/netfilter/ipvs/ip_vs_proto_udp.c index 5d21f08155e..9d106a06bb0 100644 --- a/net/netfilter/ipvs/ip_vs_proto_udp.c +++ b/net/netfilter/ipvs/ip_vs_proto_udp.c @@ -314,7 +314,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) skb->len - udphoff, ipv6_hdr(skb)->nexthdr, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } @@ -325,7 +325,7 @@ udp_csum_check(int af, struct sk_buff *skb, struct ip_vs_protocol *pp) skb->len - udphoff, ip_hdr(skb)->protocol, skb->csum)) { - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, af, pp, skb, 0, "Failed checksum for"); return 0; } diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 97b5361c036..de04ea39cde 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -543,7 +543,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, pp, skb, 0, "ip_vs_nat_xmit(): " + IP_VS_DBG_RL_PKT(10, AF_INET, pp, skb, 0, + "ip_vs_nat_xmit(): " "stopping DNAT to local address"); goto tx_error_put; } @@ -552,7 +553,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && ipv4_is_loopback(rt->rt_dst) && skb_rtable(skb)->fl.iif) { - IP_VS_DBG_RL_PKT(1, pp, skb, 0, "ip_vs_nat_xmit(): " + IP_VS_DBG_RL_PKT(1, AF_INET, pp, skb, 0, "ip_vs_nat_xmit(): " "stopping DNAT to loopback address"); goto tx_error_put; } @@ -561,7 +562,8 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, mtu = dst_mtu(&rt->dst); if ((skb->len > mtu) && (iph->frag_off & htons(IP_DF))) { icmp_send(skb, ICMP_DEST_UNREACH,ICMP_FRAG_NEEDED, htonl(mtu)); - IP_VS_DBG_RL_PKT(0, pp, skb, 0, "ip_vs_nat_xmit(): frag needed for"); + IP_VS_DBG_RL_PKT(0, AF_INET, pp, skb, 0, + "ip_vs_nat_xmit(): frag needed for"); goto tx_error_put; } @@ -593,7 +595,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, goto tx_error; } - IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still @@ -654,7 +656,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, struct nf_conn *ct = ct = nf_ct_get(skb, &ctinfo); if (ct && !nf_ct_is_untracked(ct)) { - IP_VS_DBG_RL_PKT(10, pp, skb, 0, + IP_VS_DBG_RL_PKT(10, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to local address"); goto tx_error_put; @@ -665,7 +667,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, /* From world but DNAT to loopback address? */ if (local && skb->dev && !(skb->dev->flags & IFF_LOOPBACK) && ipv6_addr_type(&rt->rt6i_dst.addr) & IPV6_ADDR_LOOPBACK) { - IP_VS_DBG_RL_PKT(1, pp, skb, 0, + IP_VS_DBG_RL_PKT(1, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): " "stopping DNAT to loopback address"); goto tx_error_put; @@ -680,7 +682,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, skb->dev = net->loopback_dev; } icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP_VS_DBG_RL_PKT(0, pp, skb, 0, + IP_VS_DBG_RL_PKT(0, AF_INET6, pp, skb, 0, "ip_vs_nat_xmit_v6(): frag needed for"); goto tx_error_put; } @@ -706,7 +708,7 @@ ip_vs_nat_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp, dst_release(&rt->dst); } - IP_VS_DBG_PKT(10, pp, skb, 0, "After DNAT"); + IP_VS_DBG_PKT(10, AF_INET6, pp, skb, 0, "After DNAT"); /* FIXME: when application helper enlarges the packet and the length is larger than the MTU of outgoing device, there will be still -- cgit v1.2.3 From 6f747aca5e61778190d1e27bdc469f49149f0230 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:15:59 +0000 Subject: xfrm6: make xfrm6_tunnel_free_spi local Function only defined and used in one file. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/ipv6/xfrm6_tunnel.c | 4 +--- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4f53532d4c2..36fdcb3fab9 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1419,7 +1419,6 @@ extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family); extern __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr); -extern void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr); extern __be32 xfrm6_tunnel_spi_lookup(struct net *net, xfrm_address_t *saddr); extern int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb); diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index ac7584b946a..2969cad408d 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -199,7 +199,7 @@ static void x6spi_destroy_rcu(struct rcu_head *head) container_of(head, struct xfrm6_tunnel_spi, rcu_head)); } -void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) +static void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) { struct xfrm6_tunnel_net *xfrm6_tn = xfrm6_tunnel_pernet(net); struct xfrm6_tunnel_spi *x6spi; @@ -223,8 +223,6 @@ void xfrm6_tunnel_free_spi(struct net *net, xfrm_address_t *saddr) spin_unlock_bh(&xfrm6_tunnel_spi_lock); } -EXPORT_SYMBOL(xfrm6_tunnel_free_spi); - static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { skb_push(skb, -skb_network_offset(skb)); -- cgit v1.2.3 From 8d8a0b1cc2a8f9794a3f1f747089b6a93774408d Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:12:01 +0000 Subject: rtnetlink: remove rtnl_kill_links The function rtnl_kill_links is defined but never used. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/rtnetlink.h | 1 - net/core/rtnetlink.c | 8 -------- 2 files changed, 9 deletions(-) (limited to 'include/net') diff --git a/include/net/rtnetlink.h b/include/net/rtnetlink.h index af60fd05084..e013c68bfb0 100644 --- a/include/net/rtnetlink.h +++ b/include/net/rtnetlink.h @@ -79,7 +79,6 @@ struct rtnl_link_ops { extern int __rtnl_link_register(struct rtnl_link_ops *ops); extern void __rtnl_link_unregister(struct rtnl_link_ops *ops); -extern void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops); extern int rtnl_link_register(struct rtnl_link_ops *ops); extern void rtnl_link_unregister(struct rtnl_link_ops *ops); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index b2a718dfd72..8121268ddbd 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -299,14 +299,6 @@ static void __rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) unregister_netdevice_many(&list_kill); } -void rtnl_kill_links(struct net *net, struct rtnl_link_ops *ops) -{ - rtnl_lock(); - __rtnl_kill_links(net, ops); - rtnl_unlock(); -} -EXPORT_SYMBOL_GPL(rtnl_kill_links); - /** * __rtnl_link_unregister - Unregister rtnl_link_ops from rtnetlink. * @ops: struct rtnl_link_ops * to unregister -- cgit v1.2.3 From 1c4c40c42da468ef02dc04940930c1926c964558 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Fri, 15 Oct 2010 05:14:19 +0000 Subject: xfrm: make xfrm_bundle_ok local Only used in one place. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 -- net/xfrm/xfrm_policy.c | 7 ++++--- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36fdcb3fab9..f28d7c9b9f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1465,8 +1465,6 @@ struct xfrm_state *xfrm_find_acq(struct net *net, struct xfrm_mark *mark, xfrm_address_t *saddr, int create, unsigned short family); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, - struct flowi *fl, int family, int strict); #ifdef CONFIG_XFRM_MIGRATE extern int km_migrate(struct xfrm_selector *sel, u8 dir, u8 type, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cbab6e1a8c9..044e7789851 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -50,6 +50,9 @@ static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); static void xfrm_policy_put_afinfo(struct xfrm_policy_afinfo *afinfo); static void xfrm_init_pmtu(struct dst_entry *dst); static int stale_bundle(struct dst_entry *dst); +static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *xdst, + struct flowi *fl, int family, int strict); + static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir); @@ -2276,7 +2279,7 @@ static void xfrm_init_pmtu(struct dst_entry *dst) * still valid. */ -int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, +static int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; @@ -2358,8 +2361,6 @@ int xfrm_bundle_ok(struct xfrm_policy *pol, struct xfrm_dst *first, return 1; } -EXPORT_SYMBOL(xfrm_bundle_ok); - int xfrm_policy_register_afinfo(struct xfrm_policy_afinfo *afinfo) { struct net *net; -- cgit v1.2.3 From 3511c9132f8b1e1b5634e41a3331c44b0c13be70 Mon Sep 17 00:00:00 2001 From: Changli Gao Date: Sat, 16 Oct 2010 13:04:08 +0000 Subject: net_sched: remove the unused parameter of qdisc_create_dflt() The first parameter dev isn't in use in qdisc_create_dflt(). Signed-off-by: Changli Gao Acked-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/sch_generic.h | 3 +-- net/sched/sch_atm.c | 5 ++--- net/sched/sch_cbq.c | 12 ++++++------ net/sched/sch_drr.c | 4 ++-- net/sched/sch_dsmark.c | 6 ++---- net/sched/sch_fifo.c | 3 +-- net/sched/sch_generic.c | 10 ++++------ net/sched/sch_hfsc.c | 8 +++----- net/sched/sch_htb.c | 8 +++----- net/sched/sch_mq.c | 2 +- net/sched/sch_multiq.c | 3 +-- net/sched/sch_netem.c | 3 +-- net/sched/sch_prio.c | 2 +- 13 files changed, 28 insertions(+), 41 deletions(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index eda8808fdac..ea1f8a83160 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -328,8 +328,7 @@ extern void qdisc_destroy(struct Qdisc *qdisc); extern void qdisc_tree_decrease_qlen(struct Qdisc *qdisc, unsigned int n); extern struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue, struct Qdisc_ops *ops); -extern struct Qdisc *qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, +extern struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, struct Qdisc_ops *ops, u32 parentid); extern void qdisc_calculate_pkt_len(struct sk_buff *skb, struct qdisc_size_table *stab); diff --git a/net/sched/sch_atm.c b/net/sched/sch_atm.c index 6318e1136b8..282540778aa 100644 --- a/net/sched/sch_atm.c +++ b/net/sched/sch_atm.c @@ -275,8 +275,7 @@ static int atm_tc_change(struct Qdisc *sch, u32 classid, u32 parent, goto err_out; } flow->filter_list = NULL; - flow->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, classid); + flow->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (!flow->q) flow->q = &noop_qdisc; pr_debug("atm_tc_change: qdisc %p\n", flow->q); @@ -543,7 +542,7 @@ static int atm_tc_init(struct Qdisc *sch, struct nlattr *opt) INIT_LIST_HEAD(&p->flows); INIT_LIST_HEAD(&p->link.list); list_add(&p->link.list, &p->flows); - p->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + p->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (!p->link.q) p->link.q = &noop_qdisc; diff --git a/net/sched/sch_cbq.c b/net/sched/sch_cbq.c index 28c01ef5abc..eb763159086 100644 --- a/net/sched/sch_cbq.c +++ b/net/sched/sch_cbq.c @@ -1379,9 +1379,9 @@ static int cbq_init(struct Qdisc *sch, struct nlattr *opt) q->link.sibling = &q->link; q->link.common.classid = sch->handle; q->link.qdisc = sch; - if (!(q->link.q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, - sch->handle))) + q->link.q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, + sch->handle); + if (!q->link.q) q->link.q = &noop_qdisc; q->link.priority = TC_CBQ_MAXPRIO-1; @@ -1623,7 +1623,7 @@ static int cbq_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, struct cbq_class *cl = (struct cbq_class*)arg; if (new == NULL) { - new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid); if (new == NULL) return -ENOBUFS; @@ -1874,8 +1874,8 @@ cbq_change_class(struct Qdisc *sch, u32 classid, u32 parentid, struct nlattr **t cl->R_tab = rtab; rtab = NULL; cl->refcnt = 1; - if (!(cl->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, classid))) + cl->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); + if (!cl->q) cl->q = &noop_qdisc; cl->common.classid = classid; cl->tparent = parent; diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index b74046a9539..aa8b5313f8c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -110,7 +110,7 @@ static int drr_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->common.classid = classid; cl->quantum = quantum; - cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; @@ -218,7 +218,7 @@ static int drr_graft_class(struct Qdisc *sch, unsigned long arg, struct drr_class *cl = (struct drr_class *)arg; if (new == NULL) { - new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid); if (new == NULL) new = &noop_qdisc; diff --git a/net/sched/sch_dsmark.c b/net/sched/sch_dsmark.c index 63d41f86679..1d295d62bb5 100644 --- a/net/sched/sch_dsmark.c +++ b/net/sched/sch_dsmark.c @@ -61,8 +61,7 @@ static int dsmark_graft(struct Qdisc *sch, unsigned long arg, sch, p, new, old); if (new == NULL) { - new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (new == NULL) new = &noop_qdisc; @@ -384,8 +383,7 @@ static int dsmark_init(struct Qdisc *sch, struct nlattr *opt) p->default_index = default_index; p->set_tc_index = nla_get_flag(tb[TCA_DSMARK_SET_TC_INDEX]); - p->q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, sch->handle); + p->q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (p->q == NULL) p->q = &noop_qdisc; diff --git a/net/sched/sch_fifo.c b/net/sched/sch_fifo.c index 5948bafa8ce..4dfecb0cba3 100644 --- a/net/sched/sch_fifo.c +++ b/net/sched/sch_fifo.c @@ -172,8 +172,7 @@ struct Qdisc *fifo_create_dflt(struct Qdisc *sch, struct Qdisc_ops *ops, struct Qdisc *q; int err = -ENOMEM; - q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - ops, TC_H_MAKE(sch->handle, 1)); + q = qdisc_create_dflt(sch->dev_queue, ops, TC_H_MAKE(sch->handle, 1)); if (q) { err = fifo_set_limit(q, limit); if (err < 0) { diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 0abcc492fbf..5dbb3cd96e5 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -576,10 +576,8 @@ errout: return ERR_PTR(err); } -struct Qdisc * qdisc_create_dflt(struct net_device *dev, - struct netdev_queue *dev_queue, - struct Qdisc_ops *ops, - unsigned int parentid) +struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue, + struct Qdisc_ops *ops, unsigned int parentid) { struct Qdisc *sch; @@ -684,7 +682,7 @@ static void attach_one_default_qdisc(struct net_device *dev, struct Qdisc *qdisc; if (dev->tx_queue_len) { - qdisc = qdisc_create_dflt(dev, dev_queue, + qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, TC_H_ROOT); if (!qdisc) { printk(KERN_INFO "%s: activation failed\n", dev->name); @@ -711,7 +709,7 @@ static void attach_default_qdiscs(struct net_device *dev) dev->qdisc = txq->qdisc_sleeping; atomic_inc(&dev->qdisc->refcnt); } else { - qdisc = qdisc_create_dflt(dev, txq, &mq_qdisc_ops, TC_H_ROOT); + qdisc = qdisc_create_dflt(txq, &mq_qdisc_ops, TC_H_ROOT); if (qdisc) { qdisc->ops->attach(qdisc); dev->qdisc = qdisc; diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index 47496098d35..069c62b7bb3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1088,7 +1088,7 @@ hfsc_change_class(struct Qdisc *sch, u32 classid, u32 parentid, cl->refcnt = 1; cl->sched = q; cl->cl_parent = parent; - cl->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + cl->qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); if (cl->qdisc == NULL) cl->qdisc = &noop_qdisc; @@ -1209,8 +1209,7 @@ hfsc_graft_class(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level > 0) return -EINVAL; if (new == NULL) { - new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, + new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->cl_common.classid); if (new == NULL) new = &noop_qdisc; @@ -1452,8 +1451,7 @@ hfsc_init_qdisc(struct Qdisc *sch, struct nlattr *opt) q->root.cl_common.classid = sch->handle; q->root.refcnt = 1; q->root.sched = q; - q->root.qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, + q->root.qdisc = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, sch->handle); if (q->root.qdisc == NULL) q->root.qdisc = &noop_qdisc; diff --git a/net/sched/sch_htb.c b/net/sched/sch_htb.c index 87d94494186..01b519d6c52 100644 --- a/net/sched/sch_htb.c +++ b/net/sched/sch_htb.c @@ -1121,8 +1121,7 @@ static int htb_graft(struct Qdisc *sch, unsigned long arg, struct Qdisc *new, if (cl->level) return -EINVAL; if (new == NULL && - (new = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, + (new = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->common.classid)) == NULL) return -ENOBUFS; @@ -1247,8 +1246,7 @@ static int htb_delete(struct Qdisc *sch, unsigned long arg) return -EBUSY; if (!cl->level && htb_parent_last_child(cl)) { - new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &pfifo_qdisc_ops, + new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, cl->parent->common.classid); last_child = 1; } @@ -1377,7 +1375,7 @@ static int htb_change_class(struct Qdisc *sch, u32 classid, /* create leaf qdisc early because it uses kmalloc(GFP_KERNEL) so that can't be used inside of sch_tree_lock -- thanks to Karlis Peisenieks */ - new_q = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + new_q = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, classid); sch_tree_lock(sch); if (parent && !parent->level) { diff --git a/net/sched/sch_mq.c b/net/sched/sch_mq.c index fe91e50f9d9..ecc302f4d2a 100644 --- a/net/sched/sch_mq.c +++ b/net/sched/sch_mq.c @@ -56,7 +56,7 @@ static int mq_init(struct Qdisc *sch, struct nlattr *opt) for (ntx = 0; ntx < dev->num_tx_queues; ntx++) { dev_queue = netdev_get_tx_queue(dev, ntx); - qdisc = qdisc_create_dflt(dev, dev_queue, &pfifo_fast_ops, + qdisc = qdisc_create_dflt(dev_queue, &pfifo_fast_ops, TC_H_MAKE(TC_H_MAJ(sch->handle), TC_H_MIN(ntx + 1))); if (qdisc == NULL) diff --git a/net/sched/sch_multiq.c b/net/sched/sch_multiq.c index 6ae251279fc..32690deab5d 100644 --- a/net/sched/sch_multiq.c +++ b/net/sched/sch_multiq.c @@ -227,8 +227,7 @@ static int multiq_tune(struct Qdisc *sch, struct nlattr *opt) for (i = 0; i < q->bands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; - child = qdisc_create_dflt(qdisc_dev(sch), - sch->dev_queue, + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); diff --git a/net/sched/sch_netem.c b/net/sched/sch_netem.c index 4714ff162bb..e5593c083a7 100644 --- a/net/sched/sch_netem.c +++ b/net/sched/sch_netem.c @@ -538,8 +538,7 @@ static int netem_init(struct Qdisc *sch, struct nlattr *opt) qdisc_watchdog_init(&q->watchdog, sch); - q->qdisc = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, - &tfifo_qdisc_ops, + q->qdisc = qdisc_create_dflt(sch->dev_queue, &tfifo_qdisc_ops, TC_H_MAKE(sch->handle, 1)); if (!q->qdisc) { pr_debug("netem: qdisc create failed\n"); diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index 0748fb1e3a4..b1c95bce33c 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -200,7 +200,7 @@ static int prio_tune(struct Qdisc *sch, struct nlattr *opt) for (i=0; ibands; i++) { if (q->queues[i] == &noop_qdisc) { struct Qdisc *child, *old; - child = qdisc_create_dflt(qdisc_dev(sch), sch->dev_queue, + child = qdisc_create_dflt(sch->dev_queue, &pfifo_qdisc_ops, TC_H_MAKE(sch->handle, i + 1)); if (child) { -- cgit v1.2.3 From 106e4c26b1529e559d1aae777f11b4f8f7bafc26 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:45:14 +0200 Subject: tproxy: kick out TIME_WAIT sockets in case a new connection comes in with the same tuple Without tproxy redirections an incoming SYN kicks out conflicting TIME_WAIT sockets, in order to handle clients that reuse ports within the TIME_WAIT period. The same mechanism didn't work in case TProxy is involved in finding the proper socket, as the time_wait processing code looked up the listening socket assuming that the listener addr/port matches those of the established connection. This is not the case with TProxy as the listener addr/port is possibly changed with the tproxy rule. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 6 ++- net/netfilter/nf_tproxy_core.c | 29 +++++++++++---- net/netfilter/xt_TPROXY.c | 68 +++++++++++++++++++++++++++++++--- net/netfilter/xt_socket.c | 2 +- 4 files changed, 90 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 208b46f4d6d..b3a8942a4e6 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -8,12 +8,16 @@ #include #include +#define NFT_LOOKUP_ANY 0 +#define NFT_LOOKUP_LISTENER 1 +#define NFT_LOOKUP_ESTABLISHED 2 + /* look up and get a reference to a matching socket */ extern struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening); + const struct net_device *in, int lookup_type); static inline void nf_tproxy_put_sock(struct sock *sk) diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 5490fc37c92..8589e5e0811 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -22,21 +22,34 @@ struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, bool listening_only) + const struct net_device *in, int lookup_type) { struct sock *sk; /* look up socket */ switch (protocol) { case IPPROTO_TCP: - if (listening_only) - sk = __inet_lookup_listener(net, &tcp_hashinfo, - daddr, ntohs(dport), - in->ifindex); - else + switch (lookup_type) { + case NFT_LOOKUP_ANY: sk = __inet_lookup(net, &tcp_hashinfo, saddr, sport, daddr, dport, in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } break; case IPPROTO_UDP: sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, @@ -47,8 +60,8 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, sk = NULL; } - pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, listener only: %d, sock %p\n", - protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), listening_only, sk); + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); return sk; } diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 21bb2aff6b8..e0b6900a92c 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -24,6 +24,57 @@ #include #include +/** + * tproxy_handle_time_wait() - handle TCP TIME_WAIT reopen redirections + * @skb: The skb being processed. + * @par: Iptables target parameters. + * @sk: The TIME_WAIT TCP socket found by the lookup. + * + * We have to handle SYN packets arriving to TIME_WAIT sockets + * differently: instead of reopening the connection we should rather + * redirect the new connection to the proxy if there's a listener + * socket present. + * + * tproxy_handle_time_wait() consumes the socket reference passed in. + * + * Returns the listener socket if there's one, the TIME_WAIT socket if + * no such listener is found, or NULL if the TCP header is incomplete. + */ +static struct sock * +tproxy_handle_time_wait(struct sk_buff *skb, const struct xt_action_param *par, struct sock *sk) +{ + const struct iphdr *iph = ip_hdr(skb); + const struct xt_tproxy_target_info *tgi = par->targinfo; + struct tcphdr _hdr, *hp; + + hp = skb_header_pointer(skb, ip_hdrlen(skb), sizeof(_hdr), &_hdr); + if (hp == NULL) { + inet_twsk_put(inet_twsk(sk)); + return NULL; + } + + if (hp->syn && !hp->rst && !hp->ack && !hp->fin) { + /* SYN to a TIME_WAIT socket, we'd rather redirect it + * to a listener socket if there's one */ + struct sock *sk2; + + sk2 = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, tgi->lport ? tgi->lport : hp->dest, + par->in, NFT_LOOKUP_LISTENER); + if (sk2) { + /* yeah, there's one, let's kill the TIME_WAIT + * socket and redirect to the listener + */ + inet_twsk_deschedule(inet_twsk(sk), &tcp_death_row); + inet_twsk_put(inet_twsk(sk)); + sk = sk2; + } + } + + return sk; +} + static unsigned int tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) { @@ -37,11 +88,18 @@ tproxy_tg(struct sk_buff *skb, const struct xt_action_param *par) return NF_DROP; sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, - iph->saddr, - tgi->laddr ? tgi->laddr : iph->daddr, - hp->source, - tgi->lport ? tgi->lport : hp->dest, - par->in, true); + iph->saddr, iph->daddr, + hp->source, hp->dest, + par->in, NFT_LOOKUP_ESTABLISHED); + + /* UDP has no TCP_TIME_WAIT state, so we never enter here */ + if (sk && sk->sk_state == TCP_TIME_WAIT) + sk = tproxy_handle_time_wait(skb, par, sk); + else if (!sk) + sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), iph->protocol, + iph->saddr, tgi->laddr ? tgi->laddr : iph->daddr, + hp->source, tgi->lport ? tgi->lport : hp->dest, + par->in, NFT_LOOKUP_LISTENER); /* NOTE: assign_sock consumes our sk reference */ if (sk && nf_tproxy_assign_sock(skb, sk)) { diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 1ca89908cba..266faa0a2fb 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -142,7 +142,7 @@ socket_match(const struct sk_buff *skb, struct xt_action_param *par, #endif sk = nf_tproxy_get_sock_v4(dev_net(skb->dev), protocol, - saddr, daddr, sport, dport, par->in, false); + saddr, daddr, sport, dport, par->in, NFT_LOOKUP_ANY); if (sk != NULL) { bool wildcard; bool transparent = true; -- cgit v1.2.3 From 6006db84a91838813cdad8a6622a4e39efe9ea47 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 12:47:34 +0200 Subject: tproxy: add lookup type checks for UDP in nf_tproxy_get_sock_v4() Also, inline this function as the lookup_type is always a literal and inlining removes branches performed at runtime. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 116 ++++++++++++++++++++++++++++++++- net/netfilter/nf_tproxy_core.c | 48 -------------- 2 files changed, 114 insertions(+), 50 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index b3a8942a4e6..1027d7f901a 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -13,11 +13,123 @@ #define NFT_LOOKUP_ESTABLISHED 2 /* look up and get a reference to a matching socket */ -extern struct sock * + + +/* This function is used by the 'TPROXY' target and the 'socket' + * match. The following lookups are supported: + * + * Explicit TProxy target rule + * =========================== + * + * This is used when the user wants to intercept a connection matching + * an explicit iptables rule. In this case the sockets are assumed + * matching in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple, it is returned, assuming the redirection + * already took place and we process a packet belonging to an + * established connection + * + * - match: if there's a listening socket matching the redirection + * (e.g. on-port & on-ip of the connection), it is returned, + * regardless if it was bound to 0.0.0.0 or an explicit + * address. The reasoning is that if there's an explicit rule, it + * does not really matter if the listener is bound to an interface + * or to 0. The user already stated that he wants redirection + * (since he added the rule). + * + * "socket" match based redirection (no specific rule) + * =================================================== + * + * There are connections with dynamic endpoints (e.g. FTP data + * connection) that the user is unable to add explicit rules + * for. These are taken care of by a generic "socket" rule. It is + * assumed that the proxy application is trusted to open such + * connections without explicit iptables rule (except of course the + * generic 'socket' rule). In this case the following sockets are + * matched in preference order: + * + * - match: if there's a fully established connection matching the + * _packet_ tuple + * + * - match: if there's a non-zero bound listener (possibly with a + * non-local address) We don't accept zero-bound listeners, since + * then local services could intercept traffic going through the + * box. + * + * Please note that there's an overlap between what a TPROXY target + * and a socket match will match. Normally if you have both rules the + * "socket" match will be the first one, effectively all packets + * belonging to established connections going through that one. + */ +static inline struct sock * nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, const __be32 saddr, const __be32 daddr, const __be16 sport, const __be16 dport, - const struct net_device *in, int lookup_type); + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = __inet_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet_lookup_listener(net, &tcp_hashinfo, + daddr, dport, + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = inet_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = (inet_sk(sk)->inet_rcv_saddr == 0); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", + protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); + + return sk; +} + static inline void nf_tproxy_put_sock(struct sock *sk) diff --git a/net/netfilter/nf_tproxy_core.c b/net/netfilter/nf_tproxy_core.c index 8589e5e0811..db655638d76 100644 --- a/net/netfilter/nf_tproxy_core.c +++ b/net/netfilter/nf_tproxy_core.c @@ -18,54 +18,6 @@ #include #include -struct sock * -nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, - const __be32 saddr, const __be32 daddr, - const __be16 sport, const __be16 dport, - const struct net_device *in, int lookup_type) -{ - struct sock *sk; - - /* look up socket */ - switch (protocol) { - case IPPROTO_TCP: - switch (lookup_type) { - case NFT_LOOKUP_ANY: - sk = __inet_lookup(net, &tcp_hashinfo, - saddr, sport, daddr, dport, - in->ifindex); - break; - case NFT_LOOKUP_LISTENER: - sk = inet_lookup_listener(net, &tcp_hashinfo, - daddr, dport, - in->ifindex); - break; - case NFT_LOOKUP_ESTABLISHED: - sk = inet_lookup_established(net, &tcp_hashinfo, - saddr, sport, daddr, dport, - in->ifindex); - break; - default: - WARN_ON(1); - sk = NULL; - break; - } - break; - case IPPROTO_UDP: - sk = udp4_lib_lookup(net, saddr, sport, daddr, dport, - in->ifindex); - break; - default: - WARN_ON(1); - sk = NULL; - } - - pr_debug("tproxy socket lookup: proto %u %08x:%u -> %08x:%u, lookup type: %d, sock %p\n", - protocol, ntohl(saddr), ntohs(sport), ntohl(daddr), ntohs(dport), lookup_type, sk); - - return sk; -} -EXPORT_SYMBOL_GPL(nf_tproxy_get_sock_v4); static void nf_tproxy_destructor(struct sk_buff *skb) -- cgit v1.2.3 From 093d282321daeb19c107e5f1f16d7f68484f3ade Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 13:06:43 +0200 Subject: tproxy: fix hash locking issue when using port redirection in __inet_inherit_port() When __inet_inherit_port() is called on a tproxy connection the wrong locks are held for the inet_bind_bucket it is added to. __inet_inherit_port() made an implicit assumption that the listener's port number (and thus its bind bucket). Unfortunately, if you're using the TPROXY target to redirect skbs to a transparent proxy that assumption is not true anymore and things break. This patch adds code to __inet_inherit_port() so that it can handle this case by looking up or creating a new bind bucket for the child socket and updates callers of __inet_inherit_port() to gracefully handle __inet_inherit_port() failing. Reported by and original patch from Stephen Buck . See http://marc.info/?t=128169268200001&r=1&w=2 for the original discussion. Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/inet_hashtables.h | 2 +- net/dccp/ipv4.c | 10 +++++++--- net/dccp/ipv6.c | 10 +++++++--- net/ipv4/inet_hashtables.c | 28 ++++++++++++++++++++++++++-- net/ipv4/tcp_ipv4.c | 10 +++++++--- net/ipv6/tcp_ipv6.c | 12 ++++++++---- 6 files changed, 56 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 74358d1b3f4..e9c2ed8af86 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -245,7 +245,7 @@ static inline int inet_sk_listen_hashfn(const struct sock *sk) } /* Caller must disable local BH processing. */ -extern void __inet_inherit_port(struct sock *sk, struct sock *child); +extern int __inet_inherit_port(struct sock *sk, struct sock *child); extern void inet_put_port(struct sock *sk); diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index d4a166f0f39..3f69ea11482 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -392,7 +392,7 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) - goto exit; + goto exit_nonewsk; sk_setup_caps(newsk, dst); @@ -409,16 +409,20 @@ struct sock *dccp_v4_request_recv_sock(struct sock *sk, struct sk_buff *skb, dccp_sync_mss(newsk, dst_mtu(dst)); + if (__inet_inherit_port(sk, newsk) < 0) { + sock_put(newsk); + goto exit; + } __inet_hash_nolisten(newsk, NULL); - __inet_inherit_port(sk, newsk); return newsk; exit_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +exit_nonewsk: + dst_release(dst); exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - dst_release(dst); return NULL; } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 6e3f32575df..dca711df9b6 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -564,7 +564,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newsk = dccp_create_openreq_child(sk, req, skb); if (newsk == NULL) - goto out; + goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks @@ -632,18 +632,22 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; newinet->inet_rcv_saddr = LOOPBACK4_IPV6; + if (__inet_inherit_port(sk, newsk) < 0) { + sock_put(newsk); + goto out; + } __inet6_hash(newsk, NULL); - __inet_inherit_port(sk, newsk); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +out_nonewsk: + dst_release(dst); out: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); if (opt != NULL && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); - dst_release(dst); return NULL; } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb7ad5a21ff..1b344f30b46 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -101,19 +101,43 @@ void inet_put_port(struct sock *sk) } EXPORT_SYMBOL(inet_put_port); -void __inet_inherit_port(struct sock *sk, struct sock *child) +int __inet_inherit_port(struct sock *sk, struct sock *child) { struct inet_hashinfo *table = sk->sk_prot->h.hashinfo; - const int bhash = inet_bhashfn(sock_net(sk), inet_sk(child)->inet_num, + unsigned short port = inet_sk(child)->inet_num; + const int bhash = inet_bhashfn(sock_net(sk), port, table->bhash_size); struct inet_bind_hashbucket *head = &table->bhash[bhash]; struct inet_bind_bucket *tb; spin_lock(&head->lock); tb = inet_csk(sk)->icsk_bind_hash; + if (tb->port != port) { + /* NOTE: using tproxy and redirecting skbs to a proxy + * on a different listener port breaks the assumption + * that the listener socket's icsk_bind_hash is the same + * as that of the child socket. We have to look up or + * create a new bind bucket for the child here. */ + struct hlist_node *node; + inet_bind_bucket_for_each(tb, node, &head->chain) { + if (net_eq(ib_net(tb), sock_net(sk)) && + tb->port == port) + break; + } + if (!node) { + tb = inet_bind_bucket_create(table->bind_bucket_cachep, + sock_net(sk), head, port); + if (!tb) { + spin_unlock(&head->lock); + return -ENOMEM; + } + } + } sk_add_bind_node(child, &tb->owners); inet_csk(child)->icsk_bind_hash = tb; spin_unlock(&head->lock); + + return 0; } EXPORT_SYMBOL_GPL(__inet_inherit_port); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index a0232f3a358..8f8527d4168 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1422,7 +1422,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_create_openreq_child(sk, req, skb); if (!newsk) - goto exit; + goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; sk_setup_caps(newsk, dst); @@ -1469,16 +1469,20 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } #endif + if (__inet_inherit_port(sk, newsk) < 0) { + sock_put(newsk); + goto exit; + } __inet_hash_nolisten(newsk, NULL); - __inet_inherit_port(sk, newsk); return newsk; exit_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); +exit_nonewsk: + dst_release(dst); exit: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - dst_release(dst); return NULL; } EXPORT_SYMBOL(tcp_v4_syn_recv_sock); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index fe6d40418c0..ba5258ef1c5 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1409,7 +1409,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_create_openreq_child(sk, req, skb); if (newsk == NULL) - goto out; + goto out_nonewsk; /* * No need to charge this sock to the relevant IPv6 refcnt debug socks @@ -1497,18 +1497,22 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } #endif + if (__inet_inherit_port(sk, newsk) < 0) { + sock_put(newsk); + goto out; + } __inet6_hash(newsk, NULL); - __inet_inherit_port(sk, newsk); return newsk; out_overflow: NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENOVERFLOWS); -out: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); +out_nonewsk: if (opt && opt != np->opt) sock_kfree_s(sk, opt, opt->tot_len); dst_release(dst); +out: + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); return NULL; } -- cgit v1.2.3 From 32a875adcdcf5f470bf967250cfd01722e23844f Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 19 Oct 2010 06:48:16 +0000 Subject: 9p: client code cleanup Make p9_client_version static since only used in one file. Remove p9_client_auth because it is defined but never used. Compile tested only. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/9p/client.h | 3 --- net/9p/client.c | 51 +------------------------------------------------ 2 files changed, 1 insertion(+), 53 deletions(-) (limited to 'include/net') diff --git a/include/net/9p/client.h b/include/net/9p/client.h index d1aa2cfb30f..7f63d5ab7b4 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -212,15 +212,12 @@ struct p9_dirent { int p9_client_statfs(struct p9_fid *fid, struct p9_rstatfs *sb); int p9_client_rename(struct p9_fid *fid, struct p9_fid *newdirfid, char *name); -int p9_client_version(struct p9_client *); struct p9_client *p9_client_create(const char *dev_name, char *options); void p9_client_destroy(struct p9_client *clnt); void p9_client_disconnect(struct p9_client *clnt); void p9_client_begin_disconnect(struct p9_client *clnt); struct p9_fid *p9_client_attach(struct p9_client *clnt, struct p9_fid *afid, char *uname, u32 n_uname, char *aname); -struct p9_fid *p9_client_auth(struct p9_client *clnt, char *uname, - u32 n_uname, char *aname); struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone); int p9_client_open(struct p9_fid *fid, int mode); diff --git a/net/9p/client.c b/net/9p/client.c index b5e1aa8d718..83bf0541d66 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -671,7 +671,7 @@ static void p9_fid_destroy(struct p9_fid *fid) kfree(fid); } -int p9_client_version(struct p9_client *c) +static int p9_client_version(struct p9_client *c) { int err = 0; struct p9_req_t *req; @@ -730,7 +730,6 @@ error: return err; } -EXPORT_SYMBOL(p9_client_version); struct p9_client *p9_client_create(const char *dev_name, char *options) { @@ -887,54 +886,6 @@ error: } EXPORT_SYMBOL(p9_client_attach); -struct p9_fid * -p9_client_auth(struct p9_client *clnt, char *uname, u32 n_uname, char *aname) -{ - int err; - struct p9_req_t *req; - struct p9_qid qid; - struct p9_fid *afid; - - P9_DPRINTK(P9_DEBUG_9P, ">>> TAUTH uname %s aname %s\n", uname, aname); - err = 0; - - afid = p9_fid_create(clnt); - if (IS_ERR(afid)) { - err = PTR_ERR(afid); - afid = NULL; - goto error; - } - - req = p9_client_rpc(clnt, P9_TAUTH, "dss?d", - afid ? afid->fid : P9_NOFID, uname, aname, n_uname); - if (IS_ERR(req)) { - err = PTR_ERR(req); - goto error; - } - - err = p9pdu_readf(req->rc, clnt->proto_version, "Q", &qid); - if (err) { - p9pdu_dump(1, req->rc); - p9_free_req(clnt, req); - goto error; - } - - P9_DPRINTK(P9_DEBUG_9P, "<<< RAUTH qid %x.%llx.%x\n", - qid.type, - (unsigned long long)qid.path, - qid.version); - - memmove(&afid->qid, &qid, sizeof(struct p9_qid)); - p9_free_req(clnt, req); - return afid; - -error: - if (afid) - p9_fid_destroy(afid); - return ERR_PTR(err); -} -EXPORT_SYMBOL(p9_client_auth); - struct p9_fid *p9_client_walk(struct p9_fid *oldfid, int nwname, char **wnames, int clone) { -- cgit v1.2.3 From e97c3e278e951501c2f385de70c3ceacdea78c4a Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:03:43 +0200 Subject: tproxy: split off ipv6 defragmentation to a separate module Like with IPv4, TProxy needs IPv6 defragmentation but does not require connection tracking. Since defragmentation was coupled with conntrack, I split off the two, creating an nf_defrag_ipv6 module, similar to the already existing nf_defrag_ipv4. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/ipv6/nf_defrag_ipv6.h | 6 ++ net/ipv6/netfilter/Makefile | 5 +- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 78 +-------------- net/ipv6/netfilter/nf_conntrack_reasm.c | 14 ++- net/ipv6/netfilter/nf_defrag_ipv6_hooks.c | 131 +++++++++++++++++++++++++ 5 files changed, 156 insertions(+), 78 deletions(-) create mode 100644 include/net/netfilter/ipv6/nf_defrag_ipv6.h create mode 100644 net/ipv6/netfilter/nf_defrag_ipv6_hooks.c (limited to 'include/net') diff --git a/include/net/netfilter/ipv6/nf_defrag_ipv6.h b/include/net/netfilter/ipv6/nf_defrag_ipv6.h new file mode 100644 index 00000000000..94dd54d76b4 --- /dev/null +++ b/include/net/netfilter/ipv6/nf_defrag_ipv6.h @@ -0,0 +1,6 @@ +#ifndef _NF_DEFRAG_IPV6_H +#define _NF_DEFRAG_IPV6_H + +extern void nf_defrag_ipv6_enable(void); + +#endif /* _NF_DEFRAG_IPV6_H */ diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index aafbba30c89..3f8e4a3d83c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -11,10 +11,11 @@ obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o # objects for l3 independent conntrack -nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o nf_conntrack_reasm.o +nf_conntrack_ipv6-objs := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o +nf_defrag_ipv6-objs := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o # l3 independent conntrack -obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o +obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o # matches obj-$(CONFIG_IP6_NF_MATCH_AH) += ip6t_ah.o diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index ff43461704b..c8af58b2256 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -16,7 +16,6 @@ #include #include #include -#include #include #include @@ -29,6 +28,7 @@ #include #include #include +#include #include static bool ipv6_pkt_to_tuple(const struct sk_buff *skb, unsigned int nhoff, @@ -189,53 +189,6 @@ out: return nf_conntrack_confirm(skb); } -static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, - struct sk_buff *skb) -{ - u16 zone = NF_CT_DEFAULT_ZONE; - - if (skb->nfct) - zone = nf_ct_zone((struct nf_conn *)skb->nfct); - -#ifdef CONFIG_BRIDGE_NETFILTER - if (skb->nf_bridge && - skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) - return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; -#endif - if (hooknum == NF_INET_PRE_ROUTING) - return IP6_DEFRAG_CONNTRACK_IN + zone; - else - return IP6_DEFRAG_CONNTRACK_OUT + zone; - -} - -static unsigned int ipv6_defrag(unsigned int hooknum, - struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) -{ - struct sk_buff *reasm; - - /* Previously seen (loopback)? */ - if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) - return NF_ACCEPT; - - reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); - /* queued */ - if (reasm == NULL) - return NF_STOLEN; - - /* error occured or not fragmented */ - if (reasm == skb) - return NF_ACCEPT; - - nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, - (struct net_device *)out, okfn); - - return NF_STOLEN; -} - static unsigned int __ipv6_conntrack_in(struct net *net, unsigned int hooknum, struct sk_buff *skb, @@ -287,13 +240,6 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { - { - .hook = ipv6_defrag, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_PRE_ROUTING, - .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv6_conntrack_in, .owner = THIS_MODULE, @@ -308,13 +254,6 @@ static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { .hooknum = NF_INET_LOCAL_OUT, .priority = NF_IP6_PRI_CONNTRACK, }, - { - .hook = ipv6_defrag, - .owner = THIS_MODULE, - .pf = NFPROTO_IPV6, - .hooknum = NF_INET_LOCAL_OUT, - .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, - }, { .hook = ipv6_confirm, .owner = THIS_MODULE, @@ -386,10 +325,6 @@ struct nf_conntrack_l3proto nf_conntrack_l3proto_ipv6 __read_mostly = { .nlattr_tuple_size = ipv6_nlattr_tuple_size, .nlattr_to_tuple = ipv6_nlattr_to_tuple, .nla_policy = ipv6_nla_policy, -#endif -#ifdef CONFIG_SYSCTL - .ctl_table_path = nf_net_netfilter_sysctl_path, - .ctl_table = nf_ct_ipv6_sysctl_table, #endif .me = THIS_MODULE, }; @@ -403,16 +338,12 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) int ret = 0; need_conntrack(); + nf_defrag_ipv6_enable(); - ret = nf_ct_frag6_init(); - if (ret < 0) { - pr_err("nf_conntrack_ipv6: can't initialize frag6.\n"); - return ret; - } ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_tcp6); if (ret < 0) { pr_err("nf_conntrack_ipv6: can't register tcp.\n"); - goto cleanup_frag6; + return ret; } ret = nf_conntrack_l4proto_register(&nf_conntrack_l4proto_udp6); @@ -450,8 +381,6 @@ static int __init nf_conntrack_l3proto_ipv6_init(void) nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); cleanup_tcp: nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - cleanup_frag6: - nf_ct_frag6_cleanup(); return ret; } @@ -463,7 +392,6 @@ static void __exit nf_conntrack_l3proto_ipv6_fini(void) nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_icmpv6); nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_udp6); nf_conntrack_l4proto_unregister(&nf_conntrack_l4proto_tcp6); - nf_ct_frag6_cleanup(); } module_init(nf_conntrack_l3proto_ipv6_init); diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 138a8b36270..489d71b844a 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -73,7 +73,7 @@ static struct inet_frags nf_frags; static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL -struct ctl_table nf_ct_ipv6_sysctl_table[] = { +struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", .data = &nf_init_frags.timeout, @@ -97,6 +97,8 @@ struct ctl_table nf_ct_ipv6_sysctl_table[] = { }, { } }; + +static struct ctl_table_header *nf_ct_frag6_sysctl_header; #endif static unsigned int nf_hashfn(struct inet_frag_queue *q) @@ -623,11 +625,21 @@ int nf_ct_frag6_init(void) inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); + nf_ct_frag6_sysctl_header = register_sysctl_paths(nf_net_netfilter_sysctl_path, + nf_ct_frag6_sysctl_table); + if (!nf_ct_frag6_sysctl_header) { + inet_frags_fini(&nf_frags); + return -ENOMEM; + } + return 0; } void nf_ct_frag6_cleanup(void) { + unregister_sysctl_table(nf_ct_frag6_sysctl_header); + nf_ct_frag6_sysctl_header = NULL; + inet_frags_fini(&nf_frags); nf_init_frags.low_thresh = 0; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c new file mode 100644 index 00000000000..99abfb53bab --- /dev/null +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -0,0 +1,131 @@ +/* (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2004 Netfilter Core Team + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, + struct sk_buff *skb) +{ + u16 zone = NF_CT_DEFAULT_ZONE; + + if (skb->nfct) + zone = nf_ct_zone((struct nf_conn *)skb->nfct); + +#ifdef CONFIG_BRIDGE_NETFILTER + if (skb->nf_bridge && + skb->nf_bridge->mask & BRNF_NF_BRIDGE_PREROUTING) + return IP6_DEFRAG_CONNTRACK_BRIDGE_IN + zone; +#endif + if (hooknum == NF_INET_PRE_ROUTING) + return IP6_DEFRAG_CONNTRACK_IN + zone; + else + return IP6_DEFRAG_CONNTRACK_OUT + zone; + +} + +static unsigned int ipv6_defrag(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct sk_buff *reasm; + + /* Previously seen (loopback)? */ + if (skb->nfct && !nf_ct_is_template((struct nf_conn *)skb->nfct)) + return NF_ACCEPT; + + reasm = nf_ct_frag6_gather(skb, nf_ct6_defrag_user(hooknum, skb)); + /* queued */ + if (reasm == NULL) + return NF_STOLEN; + + /* error occured or not fragmented */ + if (reasm == skb) + return NF_ACCEPT; + + nf_ct_frag6_output(hooknum, reasm, (struct net_device *)in, + (struct net_device *)out, okfn); + + return NF_STOLEN; +} + +static struct nf_hook_ops ipv6_defrag_ops[] = { + { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, + }, + { + .hook = ipv6_defrag, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_CONNTRACK_DEFRAG, + }, +}; + +static int __init nf_defrag_init(void) +{ + int ret = 0; + + ret = nf_ct_frag6_init(); + if (ret < 0) { + pr_err("nf_defrag_ipv6: can't initialize frag6.\n"); + return ret; + } + ret = nf_register_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + if (ret < 0) { + pr_err("nf_defrag_ipv6: can't register hooks\n"); + goto cleanup_frag6; + } + return ret; + +cleanup_frag6: + nf_ct_frag6_cleanup(); + return ret; + +} + +static void __exit nf_defrag_fini(void) +{ + nf_unregister_hooks(ipv6_defrag_ops, ARRAY_SIZE(ipv6_defrag_ops)); + nf_ct_frag6_cleanup(); +} + +void nf_defrag_ipv6_enable(void) +{ +} +EXPORT_SYMBOL_GPL(nf_defrag_ipv6_enable); + +module_init(nf_defrag_init); +module_exit(nf_defrag_fini); + +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From aa976fc011efa1f0e3290c6c9addf7c20757f885 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:05:41 +0200 Subject: tproxy: added udp6_lib_lookup function Just like with IPv4, we need access to the UDP hash table to look up local sockets, but instead of exporting the global udp_table, export a lookup function. Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/udp.h | 3 +++ net/ipv6/udp.c | 8 ++++++++ 2 files changed, 11 insertions(+) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index a184d3496b1..200b82848c9 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -183,6 +183,9 @@ extern int udp_lib_setsockopt(struct sock *sk, int level, int optname, extern struct sock *udp4_lib_lookup(struct net *net, __be32 saddr, __be16 sport, __be32 daddr, __be16 dport, int dif); +extern struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, + int dif); /* * SNMP statistics for UDP and UDP-Lite diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 33e368318d4..c84dad43211 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -320,6 +320,14 @@ static struct sock *__udp6_lib_lookup_skb(struct sk_buff *skb, udptable); } +struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be16 sport, + const struct in6_addr *daddr, __be16 dport, int dif) +{ + return __udp6_lib_lookup(net, saddr, sport, daddr, dport, dif, &udp_table); +} +EXPORT_SYMBOL_GPL(udp6_lib_lookup); + + /* * This should be easy, if there is something there we * return it, otherwise we block. -- cgit v1.2.3 From 3b9afb29917f4ab08decf358ecfd354a72a91ac0 Mon Sep 17 00:00:00 2001 From: Balazs Scheidler Date: Thu, 21 Oct 2010 16:12:14 +0200 Subject: tproxy: added IPv6 socket lookup function to nf_tproxy_core Signed-off-by: Balazs Scheidler Signed-off-by: KOVACS Krisztian Signed-off-by: Patrick McHardy --- include/net/netfilter/nf_tproxy_core.h | 72 +++++++++++++++++++++++++++++++++- 1 file changed, 71 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_tproxy_core.h b/include/net/netfilter/nf_tproxy_core.h index 1027d7f901a..cd85b3bc832 100644 --- a/include/net/netfilter/nf_tproxy_core.h +++ b/include/net/netfilter/nf_tproxy_core.h @@ -5,7 +5,8 @@ #include #include #include -#include +#include +#include #include #define NFT_LOOKUP_ANY 0 @@ -130,6 +131,75 @@ nf_tproxy_get_sock_v4(struct net *net, const u8 protocol, return sk; } +#if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) +static inline struct sock * +nf_tproxy_get_sock_v6(struct net *net, const u8 protocol, + const struct in6_addr *saddr, const struct in6_addr *daddr, + const __be16 sport, const __be16 dport, + const struct net_device *in, int lookup_type) +{ + struct sock *sk; + + /* look up socket */ + switch (protocol) { + case IPPROTO_TCP: + switch (lookup_type) { + case NFT_LOOKUP_ANY: + sk = inet6_lookup(net, &tcp_hashinfo, + saddr, sport, daddr, dport, + in->ifindex); + break; + case NFT_LOOKUP_LISTENER: + sk = inet6_lookup_listener(net, &tcp_hashinfo, + daddr, ntohs(dport), + in->ifindex); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + + break; + case NFT_LOOKUP_ESTABLISHED: + sk = __inet6_lookup_established(net, &tcp_hashinfo, + saddr, sport, daddr, ntohs(dport), + in->ifindex); + break; + default: + WARN_ON(1); + sk = NULL; + break; + } + break; + case IPPROTO_UDP: + sk = udp6_lib_lookup(net, saddr, sport, daddr, dport, + in->ifindex); + if (sk && lookup_type != NFT_LOOKUP_ANY) { + int connected = (sk->sk_state == TCP_ESTABLISHED); + int wildcard = ipv6_addr_any(&inet6_sk(sk)->rcv_saddr); + + /* NOTE: we return listeners even if bound to + * 0.0.0.0, those are filtered out in + * xt_socket, since xt_TPROXY needs 0 bound + * listeners too */ + if ((lookup_type == NFT_LOOKUP_ESTABLISHED && (!connected || wildcard)) || + (lookup_type == NFT_LOOKUP_LISTENER && connected)) { + sock_put(sk); + sk = NULL; + } + } + break; + default: + WARN_ON(1); + sk = NULL; + } + + pr_debug("tproxy socket lookup: proto %u %pI6:%u -> %pI6:%u, lookup type: %d, sock %p\n", + protocol, saddr, ntohs(sport), daddr, ntohs(dport), lookup_type, sk); + + return sk; +} +#endif static inline void nf_tproxy_put_sock(struct sock *sk) -- cgit v1.2.3