From d8e2be90d301a0381e9b2528fe2835cf2992bca3 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 18 Jul 2006 21:30:34 +0100 Subject: [PATCH] ieee80211: small ERP handling additions This adds a flag to the ieee80211_network structure which indicates whether the stored erp_value is valid (a check against 0 is not enough, since an ERP of 0 is valid and very meaningful). I also added the ERP IE bit-definitions to ieee80211.h. This is needed by some upcoming softmac patches. Signed-off-by: Daniel Drake Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index ecc42864b001..aa007f49bf29 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -240,6 +240,11 @@ struct ieee80211_snap_hdr { #define WLAN_CAPABILITY_SHORT_SLOT_TIME (1<<10) #define WLAN_CAPABILITY_DSSS_OFDM (1<<13) +/* 802.11g ERP information element */ +#define WLAN_ERP_NON_ERP_PRESENT (1<<0) +#define WLAN_ERP_USE_PROTECTION (1<<1) +#define WLAN_ERP_BARKER_PREAMBLE (1<<2) + /* Status codes */ enum ieee80211_statuscode { WLAN_STATUS_SUCCESS = 0, @@ -747,6 +752,8 @@ struct ieee80211_txb { #define NETWORK_HAS_IBSS_DFS (1<<8) #define NETWORK_HAS_TPC_REPORT (1<<9) +#define NETWORK_HAS_ERP_VALUE (1<<10) + #define QOS_QUEUE_NUM 4 #define QOS_OUI_LEN 3 #define QOS_OUI_TYPE 2 -- cgit v1.3-8-gc7d7 From 5acd0c4153be25269d7cb9a4b09fd6db571c5cc1 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 18 Jul 2006 21:33:27 +0100 Subject: [PATCH] softmac: ERP handling and driver-level notifications This patch implements ERP handling in softmac so that the drivers can support protection and preambles properly. I added a new struct, ieee80211softmac_bss_info, which is used for BSS-dependent variables like these. A new hook has been added (bssinfo_change), which allows the drivers to be notified when anything in bssinfo changes. I modified the txrates_change API to match the bssinfo_change API. The existing one is a little messy and the usefulness of providing the old rates is questionable (and can be implemented at driver level if really necessary). No drivers are using this API (yet), so this should be safe. Signed-off-by: Daniel Drake Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211softmac.h | 52 +++++++++++++++-- net/ieee80211/softmac/ieee80211softmac_assoc.c | 21 +++++-- net/ieee80211/softmac/ieee80211softmac_io.c | 14 +++++ net/ieee80211/softmac/ieee80211softmac_module.c | 78 ++++++++++++++++++------- net/ieee80211/softmac/ieee80211softmac_priv.h | 8 ++- 5 files changed, 142 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211softmac.h b/include/net/ieee80211softmac.h index 00ad810eb883..c27d03433c21 100644 --- a/include/net/ieee80211softmac.h +++ b/include/net/ieee80211softmac.h @@ -86,9 +86,6 @@ struct ieee80211softmac_assoc_info { /* BSSID we're trying to associate to */ char bssid[ETH_ALEN]; - - /* Rates supported by the network */ - struct ieee80211softmac_ratesinfo supported_rates; /* some flags. * static_essid is valid if the essid is constant, @@ -103,6 +100,7 @@ struct ieee80211softmac_assoc_info { * bssfixed is used for SIOCSIWAP. */ u8 static_essid:1, + short_preamble_available:1, associating:1, assoc_wait:1, bssvalid:1, @@ -115,6 +113,19 @@ struct ieee80211softmac_assoc_info { struct work_struct timeout; }; +struct ieee80211softmac_bss_info { + /* Rates supported by the network */ + struct ieee80211softmac_ratesinfo supported_rates; + + /* This indicates whether frames can currently be transmitted with + * short preamble (only use this variable during TX at CCK rates) */ + u8 short_preamble:1; + + /* This indicates whether protection (e.g. self-CTS) should be used + * when transmitting with OFDM modulation */ + u8 use_protection:1; +}; + enum { IEEE80211SOFTMAC_AUTH_OPEN_REQUEST = 1, IEEE80211SOFTMAC_AUTH_OPEN_RESPONSE = 2, @@ -157,6 +168,10 @@ struct ieee80211softmac_txrates { #define IEEE80211SOFTMAC_TXRATECHG_MCAST (1 << 2) /* mcast_rate */ #define IEEE80211SOFTMAC_TXRATECHG_MGT_MCAST (1 << 3) /* mgt_mcast_rate */ +#define IEEE80211SOFTMAC_BSSINFOCHG_RATES (1 << 0) /* supported_rates */ +#define IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE (1 << 1) /* short_preamble */ +#define IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION (1 << 2) /* use_protection */ + struct ieee80211softmac_device { /* 802.11 structure for data stuff */ struct ieee80211_device *ieee; @@ -200,10 +215,16 @@ struct ieee80211softmac_device { * The driver just needs to read them. */ struct ieee80211softmac_txrates txrates; - /* If the driver needs to do stuff on TX rate changes, assign this callback. */ + + /* If the driver needs to do stuff on TX rate changes, assign this + * callback. See IEEE80211SOFTMAC_TXRATECHG for change flags. */ void (*txrates_change)(struct net_device *dev, - u32 changes, /* see IEEE80211SOFTMAC_TXRATECHG flags */ - const struct ieee80211softmac_txrates *rates_before_change); + u32 changes); + + /* If the driver needs to do stuff when BSS properties change, assign + * this callback. see IEEE80211SOFTMAC_BSSINFOCHG for change flags. */ + void (*bssinfo_change)(struct net_device *dev, + u32 changes); /* private stuff follows */ /* this lock protects this structure */ @@ -216,6 +237,7 @@ struct ieee80211softmac_device { struct ieee80211softmac_scaninfo *scaninfo; struct ieee80211softmac_assoc_info associnfo; + struct ieee80211softmac_bss_info bssinfo; struct list_head auth_queue; struct list_head events; @@ -279,6 +301,24 @@ static inline u8 ieee80211softmac_suggest_txrate(struct ieee80211softmac_device return txrates->mcast_rate; } +/* Helper function which advises you when it is safe to transmit with short + * preamble. + * You should only call this function when transmitting at CCK rates. */ +static inline int ieee80211softmac_short_preamble_ok(struct ieee80211softmac_device *mac, + int is_multicast, + int is_mgt) +{ + return (is_multicast && is_mgt) ? 0 : mac->bssinfo.short_preamble; +} + +/* Helper function which advises you whether protection (e.g. self-CTS) is + * needed. 1 = protection needed, 0 = no protection needed + * Only use this function when transmitting with OFDM modulation. */ +static inline int ieee80211softmac_protection_needed(struct ieee80211softmac_device *mac) +{ + return mac->bssinfo.use_protection; +} + /* Start the SoftMAC. Call this after you initialized the device * and it is ready to run. */ diff --git a/net/ieee80211/softmac/ieee80211softmac_assoc.c b/net/ieee80211/softmac/ieee80211softmac_assoc.c index 44215ce64d4e..589f6d2c548a 100644 --- a/net/ieee80211/softmac/ieee80211softmac_assoc.c +++ b/net/ieee80211/softmac/ieee80211softmac_assoc.c @@ -96,7 +96,7 @@ ieee80211softmac_disassoc(struct ieee80211softmac_device *mac) mac->associated = 0; mac->associnfo.bssvalid = 0; mac->associnfo.associating = 0; - ieee80211softmac_init_txrates(mac); + ieee80211softmac_init_bss(mac); ieee80211softmac_call_events_locked(mac, IEEE80211SOFTMAC_EVENT_DISASSOCIATED, NULL); spin_unlock_irqrestore(&mac->lock, flags); } @@ -334,11 +334,19 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac, struct ieee80211_assoc_response * resp, struct ieee80211softmac_network *net) { + u16 cap = le16_to_cpu(resp->capability); + u8 erp_value = net->erp_value; + mac->associnfo.associating = 0; - mac->associnfo.supported_rates = net->supported_rates; + mac->bssinfo.supported_rates = net->supported_rates; ieee80211softmac_recalc_txrates(mac); mac->associated = 1; + + mac->associnfo.short_preamble_available = + (cap & WLAN_CAPABILITY_SHORT_PREAMBLE) != 0; + ieee80211softmac_process_erp(mac, erp_value); + if (mac->set_bssid_filter) mac->set_bssid_filter(mac->dev, net->bssid); memcpy(mac->ieee->bssid, net->bssid, ETH_ALEN); @@ -351,9 +359,9 @@ ieee80211softmac_associated(struct ieee80211softmac_device *mac, int ieee80211softmac_handle_assoc_response(struct net_device * dev, struct ieee80211_assoc_response * resp, - struct ieee80211_network * _ieee80211_network_do_not_use) + struct ieee80211_network * _ieee80211_network) { - /* NOTE: the network parameter has to be ignored by + /* NOTE: the network parameter has to be mostly ignored by * this code because it is the ieee80211's pointer * to the struct, not ours (we made a copy) */ @@ -385,6 +393,11 @@ ieee80211softmac_handle_assoc_response(struct net_device * dev, /* now that we know it was for us, we can cancel the timeout */ cancel_delayed_work(&mac->associnfo.timeout); + /* if the association response included an ERP IE, update our saved + * copy */ + if (_ieee80211_network->flags & NETWORK_HAS_ERP_VALUE) + network->erp_value = _ieee80211_network->erp_value; + switch (status) { case 0: dprintk(KERN_INFO PFX "associated!\n"); diff --git a/net/ieee80211/softmac/ieee80211softmac_io.c b/net/ieee80211/softmac/ieee80211softmac_io.c index 6ae5a1dc7956..82bfddbf33a2 100644 --- a/net/ieee80211/softmac/ieee80211softmac_io.c +++ b/net/ieee80211/softmac/ieee80211softmac_io.c @@ -467,3 +467,17 @@ ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, kfree(pkt); return 0; } + +/* Beacon handling */ +int ieee80211softmac_handle_beacon(struct net_device *dev, + struct ieee80211_beacon *beacon, + struct ieee80211_network *network) +{ + struct ieee80211softmac_device *mac = ieee80211_priv(dev); + + if (mac->associated && memcmp(network->bssid, mac->associnfo.bssid, ETH_ALEN) == 0) + ieee80211softmac_process_erp(mac, network->erp_value); + + return 0; +} + diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index 4b2e57d12418..c275646b2269 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -44,6 +44,7 @@ struct net_device *alloc_ieee80211softmac(int sizeof_priv) softmac->ieee->handle_assoc_response = ieee80211softmac_handle_assoc_response; softmac->ieee->handle_reassoc_request = ieee80211softmac_handle_reassoc_req; softmac->ieee->handle_disassoc = ieee80211softmac_handle_disassoc; + softmac->ieee->handle_beacon = ieee80211softmac_handle_beacon; softmac->scaninfo = NULL; softmac->associnfo.scan_retry = IEEE80211SOFTMAC_ASSOC_SCAN_RETRY_LIMIT; @@ -209,35 +210,59 @@ static u8 highest_supported_rate(struct ieee80211softmac_device *mac, return user_rate; } +void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac, + u8 erp_value) +{ + int use_protection; + int short_preamble; + u32 changes = 0; + + /* Barker preamble mode */ + short_preamble = ((erp_value & WLAN_ERP_BARKER_PREAMBLE) == 0 + && mac->associnfo.short_preamble_available) ? 1 : 0; + + /* Protection needed? */ + use_protection = (erp_value & WLAN_ERP_USE_PROTECTION) != 0; + + if (mac->bssinfo.short_preamble != short_preamble) { + changes |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE; + mac->bssinfo.short_preamble = short_preamble; + } + + if (mac->bssinfo.use_protection != use_protection) { + changes |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION; + mac->bssinfo.use_protection = use_protection; + } + + if (mac->bssinfo_change && changes) + mac->bssinfo_change(mac->dev, changes); +} + void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac) { struct ieee80211softmac_txrates *txrates = &mac->txrates; - struct ieee80211softmac_txrates oldrates; u32 change = 0; - if (mac->txrates_change) - oldrates = mac->txrates; - change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; - txrates->default_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 0); + txrates->default_rate = highest_supported_rate(mac, &mac->bssinfo.supported_rates, 0); change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; txrates->default_fallback = lower_rate(mac, txrates->default_rate); change |= IEEE80211SOFTMAC_TXRATECHG_MCAST; - txrates->mcast_rate = highest_supported_rate(mac, &mac->associnfo.supported_rates, 1); + txrates->mcast_rate = highest_supported_rate(mac, &mac->bssinfo.supported_rates, 1); if (mac->txrates_change) - mac->txrates_change(mac->dev, change, &oldrates); + mac->txrates_change(mac->dev, change); } -void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) +void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac) { struct ieee80211_device *ieee = mac->ieee; u32 change = 0; struct ieee80211softmac_txrates *txrates = &mac->txrates; - struct ieee80211softmac_txrates oldrates; + struct ieee80211softmac_bss_info *bssinfo = &mac->bssinfo; /* TODO: We need some kind of state machine to lower the default rates * if we loose too many packets. @@ -245,8 +270,6 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) /* Change the default txrate to the highest possible value. * The txrate machine will lower it, if it is too high. */ - if (mac->txrates_change) - oldrates = mac->txrates; /* FIXME: We don't correctly handle backing down to lower rates, so 801.11g devices start off at 11M for now. People can manually change it if they really need to, but 11M is @@ -272,7 +295,23 @@ void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac) change |= IEEE80211SOFTMAC_TXRATECHG_MGT_MCAST; if (mac->txrates_change) - mac->txrates_change(mac->dev, change, &oldrates); + mac->txrates_change(mac->dev, change); + + change = 0; + + bssinfo->supported_rates.count = 0; + memset(bssinfo->supported_rates.rates, 0, + sizeof(bssinfo->supported_rates.rates)); + change |= IEEE80211SOFTMAC_BSSINFOCHG_RATES; + + bssinfo->short_preamble = 0; + change |= IEEE80211SOFTMAC_BSSINFOCHG_SHORT_PREAMBLE; + + bssinfo->use_protection = 0; + change |= IEEE80211SOFTMAC_BSSINFOCHG_PROTECTION; + + if (mac->bssinfo_change) + mac->bssinfo_change(mac->dev, change); mac->running = 1; } @@ -282,7 +321,7 @@ void ieee80211softmac_start(struct net_device *dev) struct ieee80211softmac_device *mac = ieee80211_priv(dev); ieee80211softmac_start_check_rates(mac); - ieee80211softmac_init_txrates(mac); + ieee80211softmac_init_bss(mac); } EXPORT_SYMBOL_GPL(ieee80211softmac_start); @@ -335,7 +374,6 @@ u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rat static void ieee80211softmac_add_txrates_badness(struct ieee80211softmac_device *mac, int amount) { - struct ieee80211softmac_txrates oldrates; u8 default_rate = mac->txrates.default_rate; u8 default_fallback = mac->txrates.default_fallback; u32 changes = 0; @@ -348,8 +386,6 @@ printk("badness %d\n", mac->txrate_badness); mac->txrate_badness += amount; if (mac->txrate_badness <= -1000) { /* Very small badness. Try a faster bitrate. */ - if (mac->txrates_change) - memcpy(&oldrates, &mac->txrates, sizeof(oldrates)); default_rate = raise_rate(mac, default_rate); changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; default_fallback = get_fallback_rate(mac, default_rate); @@ -358,8 +394,6 @@ printk("badness %d\n", mac->txrate_badness); printk("Bitrate raised to %u\n", default_rate); } else if (mac->txrate_badness >= 10000) { /* Very high badness. Try a slower bitrate. */ - if (mac->txrates_change) - memcpy(&oldrates, &mac->txrates, sizeof(oldrates)); default_rate = lower_rate(mac, default_rate); changes |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; default_fallback = get_fallback_rate(mac, default_rate); @@ -372,7 +406,7 @@ printk("Bitrate lowered to %u\n", default_rate); mac->txrates.default_fallback = default_fallback; if (changes && mac->txrates_change) - mac->txrates_change(mac->dev, changes, &oldrates); + mac->txrates_change(mac->dev, changes); } void ieee80211softmac_fragment_lost(struct net_device *dev, @@ -416,7 +450,11 @@ ieee80211softmac_create_network(struct ieee80211softmac_device *mac, memcpy(&softnet->supported_rates.rates[softnet->supported_rates.count], net->rates_ex, net->rates_ex_len); softnet->supported_rates.count += net->rates_ex_len; sort(softnet->supported_rates.rates, softnet->supported_rates.count, sizeof(softnet->supported_rates.rates[0]), rate_cmp, NULL); - + + /* we save the ERP value because it is needed at association time, and + * many AP's do not include an ERP IE in the association response. */ + softnet->erp_value = net->erp_value; + softnet->capabilities = net->capability; return softnet; } diff --git a/net/ieee80211/softmac/ieee80211softmac_priv.h b/net/ieee80211/softmac/ieee80211softmac_priv.h index fa1f8e3acfc0..0642e090b8a7 100644 --- a/net/ieee80211/softmac/ieee80211softmac_priv.h +++ b/net/ieee80211/softmac/ieee80211softmac_priv.h @@ -116,9 +116,11 @@ ieee80211softmac_get_network_by_essid(struct ieee80211softmac_device *mac, struct ieee80211softmac_essid *essid); /* Rates related */ +void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac, + u8 erp_value); int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo *ri, u8 rate); u8 ieee80211softmac_lower_rate_delta(struct ieee80211softmac_device *mac, u8 rate, int delta); -void ieee80211softmac_init_txrates(struct ieee80211softmac_device *mac); +void ieee80211softmac_init_bss(struct ieee80211softmac_device *mac); void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac); static inline u8 lower_rate(struct ieee80211softmac_device *mac, u8 rate) { return ieee80211softmac_lower_rate_delta(mac, rate, 1); @@ -133,6 +135,9 @@ static inline u8 get_fallback_rate(struct ieee80211softmac_device *mac, u8 rate) /*** prototypes from _io.c */ int ieee80211softmac_send_mgt_frame(struct ieee80211softmac_device *mac, void* ptrarg, u32 type, u32 arg); +int ieee80211softmac_handle_beacon(struct net_device *dev, + struct ieee80211_beacon *beacon, + struct ieee80211_network *network); /*** prototypes from _auth.c */ /* do these have to go into the public header? */ @@ -189,6 +194,7 @@ struct ieee80211softmac_network { authenticated:1, auth_desynced_once:1; + u8 erp_value; /* Saved ERP value */ u16 capabilities; /* Capabilities bitfield */ u8 challenge_len; /* Auth Challenge length */ char *challenge; /* Challenge Text */ -- cgit v1.3-8-gc7d7 From d7712ac254a4ae2e9c927e29e37b8c7ac334e6ad Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 18 Jul 2006 21:34:56 +0100 Subject: [PATCH] softmac: export highest_supported_rate function zd1211 needs this functionality, no point duplicating it. Signed-off-by: Daniel Drake Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211softmac.h | 8 ++++++++ net/ieee80211/softmac/ieee80211softmac_module.c | 16 +++++----------- 2 files changed, 13 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211softmac.h b/include/net/ieee80211softmac.h index c27d03433c21..425b3a57ac74 100644 --- a/include/net/ieee80211softmac.h +++ b/include/net/ieee80211softmac.h @@ -279,6 +279,14 @@ extern void ieee80211softmac_fragment_lost(struct net_device *dev, * Note that the rates need to be sorted. */ extern void ieee80211softmac_set_rates(struct net_device *dev, u8 count, u8 *rates); +/* Finds the highest rate which is: + * 1. Present in ri (optionally a basic rate) + * 2. Supported by the device + * 3. Less than or equal to the user-defined rate + */ +extern u8 ieee80211softmac_highest_supported_rate(struct ieee80211softmac_device *mac, + struct ieee80211softmac_ratesinfo *ri, int basic_only); + /* Helper function which advises you the rate at which a frame should be * transmitted at. */ static inline u8 ieee80211softmac_suggest_txrate(struct ieee80211softmac_device *mac, diff --git a/net/ieee80211/softmac/ieee80211softmac_module.c b/net/ieee80211/softmac/ieee80211softmac_module.c index c275646b2269..addea1cf73ae 100644 --- a/net/ieee80211/softmac/ieee80211softmac_module.c +++ b/net/ieee80211/softmac/ieee80211softmac_module.c @@ -179,21 +179,14 @@ int ieee80211softmac_ratesinfo_rate_supported(struct ieee80211softmac_ratesinfo return 0; } -/* Finds the highest rate which is: - * 1. Present in ri (optionally a basic rate) - * 2. Supported by the device - * 3. Less than or equal to the user-defined rate - */ -static u8 highest_supported_rate(struct ieee80211softmac_device *mac, +u8 ieee80211softmac_highest_supported_rate(struct ieee80211softmac_device *mac, struct ieee80211softmac_ratesinfo *ri, int basic_only) { u8 user_rate = mac->txrates.user_rate; int i; - if (ri->count == 0) { - dprintk(KERN_ERR PFX "empty ratesinfo?\n"); + if (ri->count == 0) return IEEE80211_CCK_RATE_1MB; - } for (i = ri->count - 1; i >= 0; i--) { u8 rate = ri->rates[i]; @@ -209,6 +202,7 @@ static u8 highest_supported_rate(struct ieee80211softmac_device *mac, /* If we haven't found a suitable rate by now, just trust the user */ return user_rate; } +EXPORT_SYMBOL_GPL(ieee80211softmac_highest_supported_rate); void ieee80211softmac_process_erp(struct ieee80211softmac_device *mac, u8 erp_value) @@ -244,13 +238,13 @@ void ieee80211softmac_recalc_txrates(struct ieee80211softmac_device *mac) u32 change = 0; change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT; - txrates->default_rate = highest_supported_rate(mac, &mac->bssinfo.supported_rates, 0); + txrates->default_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 0); change |= IEEE80211SOFTMAC_TXRATECHG_DEFAULT_FBACK; txrates->default_fallback = lower_rate(mac, txrates->default_rate); change |= IEEE80211SOFTMAC_TXRATECHG_MCAST; - txrates->mcast_rate = highest_supported_rate(mac, &mac->bssinfo.supported_rates, 1); + txrates->mcast_rate = ieee80211softmac_highest_supported_rate(mac, &mac->bssinfo.supported_rates, 1); if (mac->txrates_change) mac->txrates_change(mac->dev, change); -- cgit v1.3-8-gc7d7 From f2060f039e8a8bc83b10e6d0f8fb440425560569 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Tue, 18 Jul 2006 21:38:05 +0100 Subject: [PATCH] ieee80211: Make ieee80211_rx_any usable ieee80211_rx_any is new to 2.6.18-rc1, even though it appears this function was never completed: http://lists.sipsolutions.net/pipermail/softmac-dev/2006-February/000103.html This patch changes ieee80211_rx_any to always claim the skb, which avoids further driver complexity and the possibility of leaking management frames. It also exports the function so that people can actually use it. Signed-off-by: Daniel Drake Acked-by: Johannes Berg Signed-off-by: John W. Linville --- include/net/ieee80211.h | 2 ++ net/ieee80211/ieee80211_rx.c | 38 ++++++++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/net/ieee80211.h b/include/net/ieee80211.h index aa007f49bf29..b174ebb277a9 100644 --- a/include/net/ieee80211.h +++ b/include/net/ieee80211.h @@ -1259,6 +1259,8 @@ extern int ieee80211_tx_frame(struct ieee80211_device *ieee, int total_len, int encrypt_mpdu); /* ieee80211_rx.c */ +extern void ieee80211_rx_any(struct ieee80211_device *ieee, + struct sk_buff *skb, struct ieee80211_rx_stats *stats); extern int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_rx_stats *rx_stats); /* make sure to set stats->len */ diff --git a/net/ieee80211/ieee80211_rx.c b/net/ieee80211/ieee80211_rx.c index f73fc164b9ef..d60358d702d7 100644 --- a/net/ieee80211/ieee80211_rx.c +++ b/net/ieee80211/ieee80211_rx.c @@ -779,33 +779,44 @@ int ieee80211_rx(struct ieee80211_device *ieee, struct sk_buff *skb, return 0; } -/* Filter out unrelated packets, call ieee80211_rx[_mgt] */ -int ieee80211_rx_any(struct ieee80211_device *ieee, +/* Filter out unrelated packets, call ieee80211_rx[_mgt] + * This function takes over the skb, it should not be used again after calling + * this function. */ +void ieee80211_rx_any(struct ieee80211_device *ieee, struct sk_buff *skb, struct ieee80211_rx_stats *stats) { struct ieee80211_hdr_4addr *hdr; int is_packet_for_us; u16 fc; - if (ieee->iw_mode == IW_MODE_MONITOR) - return ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL; + if (ieee->iw_mode == IW_MODE_MONITOR) { + if (!ieee80211_rx(ieee, skb, stats)) + dev_kfree_skb_irq(skb); + return; + } + + if (skb->len < sizeof(struct ieee80211_hdr)) + goto drop_free; hdr = (struct ieee80211_hdr_4addr *)skb->data; fc = le16_to_cpu(hdr->frame_ctl); if ((fc & IEEE80211_FCTL_VERS) != 0) - return -EINVAL; + goto drop_free; switch (fc & IEEE80211_FCTL_FTYPE) { case IEEE80211_FTYPE_MGMT: + if (skb->len < sizeof(struct ieee80211_hdr_3addr)) + goto drop_free; ieee80211_rx_mgt(ieee, hdr, stats); - return 0; + dev_kfree_skb_irq(skb); + return; case IEEE80211_FTYPE_DATA: break; case IEEE80211_FTYPE_CTL: - return 0; + return; default: - return -EINVAL; + return; } is_packet_for_us = 0; @@ -849,8 +860,14 @@ int ieee80211_rx_any(struct ieee80211_device *ieee, } if (is_packet_for_us) - return (ieee80211_rx(ieee, skb, stats) ? 0 : -EINVAL); - return 0; + if (!ieee80211_rx(ieee, skb, stats)) + dev_kfree_skb_irq(skb); + return; + +drop_free: + dev_kfree_skb_irq(skb); + ieee->stats.rx_dropped++; + return; } #define MGMT_FRAME_FIXED_PART_LENGTH 0x24 @@ -1730,5 +1747,6 @@ void ieee80211_rx_mgt(struct ieee80211_device *ieee, } } +EXPORT_SYMBOL_GPL(ieee80211_rx_any); EXPORT_SYMBOL(ieee80211_rx_mgt); EXPORT_SYMBOL(ieee80211_rx); -- cgit v1.3-8-gc7d7 From 9409f38a0c8773c04bff8dda8c552d7ea013d956 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 6 Aug 2006 19:49:12 +1000 Subject: [IPSEC]: Move linux/crypto.h inclusion out of net/xfrm.h The header file linux/crypto.h is only needed by a few files so including it in net/xfrm.h (which is included by half of the networking stack) is a waste. This patch moves it out of net/xfrm.h and into the specific header files that actually need it. Signed-off-by: Herbert Xu --- include/net/ah.h | 1 + include/net/esp.h | 1 + include/net/ipcomp.h | 4 ++++ include/net/xfrm.h | 2 +- net/xfrm/xfrm_user.c | 1 + 5 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ah.h b/include/net/ah.h index ceff00afae09..8e27c9ba8b84 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -1,6 +1,7 @@ #ifndef _NET_AH_H #define _NET_AH_H +#include #include /* This is the maximum truncated ICV length that we know of. */ diff --git a/include/net/esp.h b/include/net/esp.h index 90cd94fad7d9..6eb837973c84 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -1,6 +1,7 @@ #ifndef _NET_ESP_H #define _NET_ESP_H +#include #include #include diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index e651a57ecdd5..b94e3047b4d9 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,8 +1,12 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include + #define IPCOMP_SCRATCH_SIZE 65400 +struct crypto_tfm; + struct ipcomp_data { u16 threshold; struct crypto_tfm **tfms; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9c5ee9f20b65..10396b4bde14 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -8,7 +8,6 @@ #include #include #include -#include #include #include #include @@ -985,6 +984,7 @@ extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe); struct crypto_tfm; +struct scatterlist; typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int); extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 3e6a722d072e..7d18ca03c80d 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -10,6 +10,7 @@ * */ +#include #include #include #include -- cgit v1.3-8-gc7d7 From 04ff12609445c7b462d7fc7f2d30dad442c922f3 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 13 Aug 2006 08:50:00 +1000 Subject: [IPSEC]: Add compatibility algorithm name support This patch adds a compatibility name field for each IPsec algorithm. This is needed when parameterised algorithms are used. For example, "md5" will become "hmac(md5)", and "aes" will become "cbc(aes)". Signed-off-by: Herbert Xu --- include/net/xfrm.h | 1 + net/xfrm/xfrm_algo.c | 3 ++- net/xfrm/xfrm_user.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 10396b4bde14..e9114e41affc 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -854,6 +854,7 @@ struct xfrm_algo_comp_info { struct xfrm_algo_desc { char *name; + char *compat; u8 available:1; union { struct xfrm_algo_auth_info auth; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 04e1aea58bc9..b68974b38741 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -359,7 +359,8 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, return NULL; for (i = 0; i < entries; i++) { - if (strcmp(name, list[i].name)) + if (strcmp(name, list[i].name) && + (!list[i].compat || strcmp(name, list[i].compat))) continue; if (list[i].available) diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7d18ca03c80d..fa79ddc4239e 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -213,6 +213,7 @@ static int attach_one_algo(struct xfrm_algo **algpp, u8 *props, return -ENOMEM; memcpy(p, ualg, len); + strcpy(p->alg_name, algo->name); *algpp = p; return 0; } -- cgit v1.3-8-gc7d7 From 6b7326c8497f954c2cfcb4c49fe42be5b80887bc Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 30 Jul 2006 15:41:01 +1000 Subject: [IPSEC] ESP: Use block ciphers where applicable This patch converts IPSec/ESP to use the new block cipher type where applicable. Similar to the HMAC conversion, existing algorithm names have been kept for compatibility. Signed-off-by: Herbert Xu --- include/net/esp.h | 2 +- net/ipv4/Kconfig | 1 + net/ipv4/esp4.c | 49 +++++++++++++++++++++++++++++-------------------- net/ipv6/Kconfig | 1 + net/ipv6/esp6.c | 48 ++++++++++++++++++++++++++++-------------------- net/xfrm/xfrm_algo.c | 24 ++++++++++++++++-------- 6 files changed, 76 insertions(+), 49 deletions(-) (limited to 'include/net') diff --git a/include/net/esp.h b/include/net/esp.h index 6eb837973c84..af2ff18700c7 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -22,7 +22,7 @@ struct esp_data * >= crypto_tfm_alg_ivsize(tfm). */ int ivlen; int padlen; /* 0..255 */ - struct crypto_tfm *tfm; /* crypto handle */ + struct crypto_blkcipher *tfm; /* crypto handle */ } conf; /* Integrity. It is active when icv_full_len != 0 */ diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 8514106761b0..3b5d504a74be 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -386,6 +386,7 @@ config INET_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index fc2f8ce441de..7c63ae494742 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -16,7 +17,8 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) int err; struct iphdr *top_iph; struct ip_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -36,7 +38,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esp = x->data; alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -92,7 +96,7 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) xfrm_aevent_doreplay(x); if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -103,14 +107,17 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { @@ -121,8 +128,6 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) ip_send_check(top_iph); - err = 0; - error: return err; } @@ -137,8 +142,10 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph; struct ip_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ip_esp_hdr) - esp->conf.ivlen - alen; int nfrags; @@ -146,6 +153,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) u8 nexthdr[2]; struct scatterlist *sg; int padlen; + int err; if (!pskb_may_pull(skb, sizeof(struct ip_esp_hdr))) goto out; @@ -178,7 +186,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); sg = &esp->sgbuf[0]; @@ -188,9 +196,11 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) goto out; } skb_to_sgvec(skb, sg, sizeof(struct ip_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + err = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(err)) + return err; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -254,7 +264,7 @@ out: static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); @@ -293,7 +303,7 @@ static void esp_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; @@ -307,6 +317,7 @@ static void esp_destroy(struct xfrm_state *x) static int esp_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -351,13 +362,11 @@ static int esp_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); @@ -365,7 +374,7 @@ static int esp_init_state(struct xfrm_state *x) goto error; get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; if (x->props.mode) diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index e923d4dea418..0ba06c0c5d39 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -77,6 +77,7 @@ config INET6_ESP select CRYPTO select CRYPTO_HMAC select CRYPTO_MD5 + select CRYPTO_CBC select CRYPTO_SHA1 select CRYPTO_DES ---help--- diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index a278d5e862fe..46a7e687948e 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -24,6 +24,7 @@ * This file is derived from net/ipv4/esp.c */ +#include #include #include #include @@ -44,7 +45,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) int hdr_len; struct ipv6hdr *top_iph; struct ipv6_esp_hdr *esph; - struct crypto_tfm *tfm; + struct crypto_blkcipher *tfm; + struct blkcipher_desc desc; struct esp_data *esp; struct sk_buff *trailer; int blksize; @@ -67,7 +69,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) alen = esp->auth.icv_trunc_len; tfm = esp->conf.tfm; - blksize = ALIGN(crypto_tfm_alg_blocksize(tfm), 4); + desc.tfm = tfm; + desc.flags = 0; + blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); clen = ALIGN(clen + 2, blksize); if (esp->conf.padlen) clen = ALIGN(clen, esp->conf.padlen); @@ -96,7 +100,7 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) xfrm_aevent_doreplay(x); if (esp->conf.ivlen) - crypto_cipher_set_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -107,14 +111,17 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) goto error; } skb_to_sgvec(skb, sg, esph->enc_data+esp->conf.ivlen-skb->data, clen); - crypto_cipher_encrypt(tfm, sg, sg, clen); + err = crypto_blkcipher_encrypt(&desc, sg, sg, clen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); } while (0); + if (unlikely(err)) + goto error; + if (esp->conf.ivlen) { - memcpy(esph->enc_data, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); - crypto_cipher_get_iv(tfm, esp->conf.ivec, crypto_tfm_alg_ivsize(tfm)); + memcpy(esph->enc_data, esp->conf.ivec, esp->conf.ivlen); + crypto_blkcipher_get_iv(tfm, esp->conf.ivec, esp->conf.ivlen); } if (esp->auth.icv_full_len) { @@ -123,8 +130,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) pskb_put(skb, trailer, alen); } - err = 0; - error: return err; } @@ -134,8 +139,10 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) struct ipv6hdr *iph; struct ipv6_esp_hdr *esph; struct esp_data *esp = x->data; + struct crypto_blkcipher *tfm = esp->conf.tfm; + struct blkcipher_desc desc = { .tfm = tfm }; struct sk_buff *trailer; - int blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + int blksize = ALIGN(crypto_blkcipher_blocksize(tfm), 4); int alen = esp->auth.icv_trunc_len; int elen = skb->len - sizeof(struct ipv6_esp_hdr) - esp->conf.ivlen - alen; @@ -182,7 +189,7 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* Get ivec. This can be wrong, check against another impls. */ if (esp->conf.ivlen) - crypto_cipher_set_iv(esp->conf.tfm, esph->enc_data, crypto_tfm_alg_ivsize(esp->conf.tfm)); + crypto_blkcipher_set_iv(tfm, esph->enc_data, esp->conf.ivlen); { u8 nexthdr[2]; @@ -197,9 +204,11 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) } } skb_to_sgvec(skb, sg, sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen, elen); - crypto_cipher_decrypt(esp->conf.tfm, sg, sg, elen); + ret = crypto_blkcipher_decrypt(&desc, sg, sg, elen); if (unlikely(sg != &esp->sgbuf[0])) kfree(sg); + if (unlikely(ret)) + goto out; if (skb_copy_bits(skb, skb->len-alen-2, nexthdr, 2)) BUG(); @@ -225,7 +234,7 @@ out: static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) { struct esp_data *esp = x->data; - u32 blksize = ALIGN(crypto_tfm_alg_blocksize(esp->conf.tfm), 4); + u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); if (x->props.mode) { mtu = ALIGN(mtu + 2, blksize); @@ -266,7 +275,7 @@ static void esp6_destroy(struct xfrm_state *x) if (!esp) return; - crypto_free_tfm(esp->conf.tfm); + crypto_free_blkcipher(esp->conf.tfm); esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; @@ -280,6 +289,7 @@ static void esp6_destroy(struct xfrm_state *x) static int esp6_init_state(struct xfrm_state *x) { struct esp_data *esp = NULL; + struct crypto_blkcipher *tfm; /* null auth and encryption can have zero length keys */ if (x->aalg) { @@ -327,13 +337,11 @@ static int esp6_init_state(struct xfrm_state *x) } esp->conf.key = x->ealg->alg_key; esp->conf.key_len = (x->ealg->alg_key_len+7)/8; - if (x->props.ealgo == SADB_EALG_NULL) - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_ECB); - else - esp->conf.tfm = crypto_alloc_tfm(x->ealg->alg_name, CRYPTO_TFM_MODE_CBC); - if (esp->conf.tfm == NULL) + tfm = crypto_alloc_blkcipher(x->ealg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) goto error; - esp->conf.ivlen = crypto_tfm_alg_ivsize(esp->conf.tfm); + esp->conf.tfm = tfm; + esp->conf.ivlen = crypto_blkcipher_ivsize(tfm); esp->conf.padlen = 0; if (esp->conf.ivlen) { esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); @@ -341,7 +349,7 @@ static int esp6_init_state(struct xfrm_state *x) goto error; get_random_bytes(esp->conf.ivec, esp->conf.ivlen); } - if (crypto_cipher_setkey(esp->conf.tfm, esp->conf.key, esp->conf.key_len)) + if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; if (x->props.mode) diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index b68974b38741..9b03d8497fba 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -118,7 +118,8 @@ static struct xfrm_algo_desc aalg_list[] = { static struct xfrm_algo_desc ealg_list[] = { { - .name = "cipher_null", + .name = "ecb(cipher_null)", + .compat = "cipher_null", .uinfo = { .encr = { @@ -135,7 +136,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des", + .name = "cbc(des)", + .compat = "des", .uinfo = { .encr = { @@ -152,7 +154,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "des3_ede", + .name = "cbc(des3_ede)", + .compat = "des3_ede", .uinfo = { .encr = { @@ -169,7 +172,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "cast128", + .name = "cbc(cast128)", + .compat = "cast128", .uinfo = { .encr = { @@ -186,7 +190,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "blowfish", + .name = "cbc(blowfish)", + .compat = "blowfish", .uinfo = { .encr = { @@ -203,7 +208,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "aes", + .name = "cbc(aes)", + .compat = "aes", .uinfo = { .encr = { @@ -220,7 +226,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "serpent", + .name = "cbc(serpent)", + .compat = "serpent", .uinfo = { .encr = { @@ -237,7 +244,8 @@ static struct xfrm_algo_desc ealg_list[] = { } }, { - .name = "twofish", + .name = "cbc(twofish)", + .compat = "twofish", .uinfo = { .encr = { -- cgit v1.3-8-gc7d7 From 07d4ee583e21830ec5604d31f65cdc60a6eca19e Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 14:24:50 +1000 Subject: [IPSEC]: Use HMAC template and hash interface This patch converts IPsec to use the new HMAC template. The names of existing simple digest algorithms may still be used to refer to their HMAC composites. The same structure can be used by other MACs such as AES-XCBC-MAC. This patch also switches from the digest interface to hash. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/ah.h | 29 ++++++++++++++++++----------- include/net/esp.h | 28 ++++++++++++++++------------ include/net/xfrm.h | 9 +++++---- net/ipv4/ah4.c | 36 ++++++++++++++++++++++++------------ net/ipv4/esp4.c | 36 +++++++++++++++++++++--------------- net/ipv6/ah6.c | 35 +++++++++++++++++++++++------------ net/ipv6/esp6.c | 42 ++++++++++++++++++++++++------------------ net/xfrm/xfrm_algo.c | 40 +++++++++++++++++++++++++++------------- 8 files changed, 158 insertions(+), 97 deletions(-) (limited to 'include/net') diff --git a/include/net/ah.h b/include/net/ah.h index 8e27c9ba8b84..8f257c159902 100644 --- a/include/net/ah.h +++ b/include/net/ah.h @@ -15,22 +15,29 @@ struct ah_data int icv_full_len; int icv_trunc_len; - void (*icv)(struct ah_data*, - struct sk_buff *skb, u8 *icv); - - struct crypto_tfm *tfm; + struct crypto_hash *tfm; }; -static inline void -ah_hmac_digest(struct ah_data *ahp, struct sk_buff *skb, u8 *auth_data) +static inline int ah_mac_digest(struct ah_data *ahp, struct sk_buff *skb, + u8 *auth_data) { - struct crypto_tfm *tfm = ahp->tfm; + struct hash_desc desc; + int err; + + desc.tfm = ahp->tfm; + desc.flags = 0; memset(auth_data, 0, ahp->icv_trunc_len); - crypto_hmac_init(tfm, ahp->key, &ahp->key_len); - skb_icv_walk(skb, tfm, 0, skb->len, crypto_hmac_update); - crypto_hmac_final(tfm, ahp->key, &ahp->key_len, ahp->work_icv); - memcpy(auth_data, ahp->work_icv, ahp->icv_trunc_len); + err = crypto_hash_init(&desc); + if (unlikely(err)) + goto out; + err = skb_icv_walk(skb, &desc, 0, skb->len, crypto_hash_update); + if (unlikely(err)) + goto out; + err = crypto_hash_final(&desc, ahp->work_icv); + +out: + return err; } #endif diff --git a/include/net/esp.h b/include/net/esp.h index af2ff18700c7..064366d66eea 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -35,7 +35,7 @@ struct esp_data void (*icv)(struct esp_data*, struct sk_buff *skb, int offset, int len, u8 *icv); - struct crypto_tfm *tfm; + struct crypto_hash *tfm; } auth; }; @@ -43,18 +43,22 @@ extern int skb_to_sgvec(struct sk_buff *skb, struct scatterlist *sg, int offset, extern int skb_cow_data(struct sk_buff *skb, int tailbits, struct sk_buff **trailer); extern void *pskb_put(struct sk_buff *skb, struct sk_buff *tail, int len); -static inline void -esp_hmac_digest(struct esp_data *esp, struct sk_buff *skb, int offset, - int len, u8 *auth_data) +static inline int esp_mac_digest(struct esp_data *esp, struct sk_buff *skb, + int offset, int len) { - struct crypto_tfm *tfm = esp->auth.tfm; - char *icv = esp->auth.work_icv; - - memset(auth_data, 0, esp->auth.icv_trunc_len); - crypto_hmac_init(tfm, esp->auth.key, &esp->auth.key_len); - skb_icv_walk(skb, tfm, offset, len, crypto_hmac_update); - crypto_hmac_final(tfm, esp->auth.key, &esp->auth.key_len, icv); - memcpy(auth_data, icv, esp->auth.icv_trunc_len); + struct hash_desc desc; + int err; + + desc.tfm = esp->auth.tfm; + desc.flags = 0; + + err = crypto_hash_init(&desc); + if (unlikely(err)) + return err; + err = skb_icv_walk(skb, &desc, offset, len, crypto_hash_update); + if (unlikely(err)) + return err; + return crypto_hash_final(&desc, esp->auth.work_icv); } #endif diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e9114e41affc..3ecd9fa1ed4b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -984,12 +984,13 @@ extern struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe); extern struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe); -struct crypto_tfm; +struct hash_desc; struct scatterlist; -typedef void (icv_update_fn_t)(struct crypto_tfm *, struct scatterlist *, unsigned int); +typedef int (icv_update_fn_t)(struct hash_desc *, struct scatterlist *, + unsigned int); -extern void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update); +extern int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *tfm, + int offset, int len, icv_update_fn_t icv_update); static inline int xfrm_addr_cmp(xfrm_address_t *a, xfrm_address_t *b, int family) diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 1366bc6ce6a5..2b98943e6b02 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -1,3 +1,4 @@ +#include #include #include #include @@ -97,7 +98,10 @@ static int ah_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); top_iph->tos = iph->tos; top_iph->ttl = iph->ttl; @@ -119,6 +123,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) { int ah_hlen; int ihl; + int err = -EINVAL; struct iphdr *iph; struct ip_auth_hdr *ah; struct ah_data *ahp; @@ -166,8 +171,11 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); skb_push(skb, ihl); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { x->stats.integrity_failed++; goto out; } @@ -179,7 +187,7 @@ static int ah_input(struct xfrm_state *x, struct sk_buff *skb) return 0; out: - return -EINVAL; + return err; } static void ah4_err(struct sk_buff *skb, u32 info) @@ -204,6 +212,7 @@ static int ah_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -221,24 +230,27 @@ static int ah_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -262,7 +274,7 @@ static int ah_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -277,7 +289,7 @@ static void ah_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index 7c63ae494742..b428489f6ccd 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -121,9 +121,9 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ip_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } ip_send_check(top_iph); @@ -163,15 +163,16 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; - - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + u8 sum[alen]; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + err = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (err) + goto out; + + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; goto out; } @@ -307,7 +308,7 @@ static void esp_destroy(struct xfrm_state *x) esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -333,22 +334,27 @@ static int esp_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { + crypto_hash_digestsize(hash)) { NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), + crypto_hash_digestsize(hash), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 9d4831bd4335..00ffa7bc6c9f 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -213,7 +213,10 @@ static int ah6_output(struct xfrm_state *x, struct sk_buff *skb) ah->spi = x->id.spi; ah->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - ahp->icv(ahp, skb, ah->auth_data); + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto error_free_iph; + memcpy(ah->auth_data, ahp->work_icv, ahp->icv_trunc_len); err = 0; @@ -251,6 +254,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) u16 hdr_len; u16 ah_hlen; int nexthdr; + int err = -EINVAL; if (!pskb_may_pull(skb, sizeof(struct ip_auth_hdr))) goto out; @@ -292,8 +296,11 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) memcpy(auth_data, ah->auth_data, ahp->icv_trunc_len); memset(ah->auth_data, 0, ahp->icv_trunc_len); skb_push(skb, hdr_len); - ahp->icv(ahp, skb, ah->auth_data); - if (memcmp(ah->auth_data, auth_data, ahp->icv_trunc_len)) { + err = ah_mac_digest(ahp, skb, ah->auth_data); + if (err) + goto free_out; + err = -EINVAL; + if (memcmp(ahp->work_icv, auth_data, ahp->icv_trunc_len)) { LIMIT_NETDEBUG(KERN_WARNING "ipsec ah authentication error\n"); x->stats.integrity_failed++; goto free_out; @@ -310,7 +317,7 @@ static int ah6_input(struct xfrm_state *x, struct sk_buff *skb) free_out: kfree(tmp_hdr); out: - return -EINVAL; + return err; } static void ah6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, @@ -338,6 +345,7 @@ static int ah6_init_state(struct xfrm_state *x) { struct ah_data *ahp = NULL; struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *tfm; if (!x->aalg) goto error; @@ -355,24 +363,27 @@ static int ah6_init_state(struct xfrm_state *x) ahp->key = x->aalg->alg_key; ahp->key_len = (x->aalg->alg_key_len+7)/8; - ahp->tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (!ahp->tfm) + tfm = crypto_alloc_hash(x->aalg->alg_name, 0, CRYPTO_ALG_ASYNC); + if (IS_ERR(tfm)) + goto error; + + ahp->tfm = tfm; + if (crypto_hash_setkey(tfm, ahp->key, ahp->key_len)) goto error; - ahp->icv = ah_hmac_digest; /* * Lookup the algorithm description maintained by xfrm_algo, * verify crypto transform properties, and store information * we need for AH processing. This lookup cannot fail here - * after a successful crypto_alloc_tfm(). + * after a successful crypto_alloc_hash(). */ aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(ahp->tfm)) { + crypto_hash_digestsize(tfm)) { printk(KERN_INFO "AH: %s digestsize %u != %hu\n", - x->aalg->alg_name, crypto_tfm_alg_digestsize(ahp->tfm), + x->aalg->alg_name, crypto_hash_digestsize(tfm), aalg_desc->uinfo.auth.icv_fullbits/8); goto error; } @@ -396,7 +407,7 @@ static int ah6_init_state(struct xfrm_state *x) error: if (ahp) { kfree(ahp->work_icv); - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); kfree(ahp); } return -EINVAL; @@ -411,7 +422,7 @@ static void ah6_destroy(struct xfrm_state *x) kfree(ahp->work_icv); ahp->work_icv = NULL; - crypto_free_tfm(ahp->tfm); + crypto_free_hash(ahp->tfm); ahp->tfm = NULL; kfree(ahp); } diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 46a7e687948e..2ebfd281e721 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -125,9 +125,9 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } if (esp->auth.icv_full_len) { - esp->auth.icv(esp, skb, (u8*)esph-skb->data, - sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen+clen, trailer->tail); - pskb_put(skb, trailer, alen); + err = esp_mac_digest(esp, skb, (u8 *)esph - skb->data, + sizeof(*esph) + esp->conf.ivlen + clen); + memcpy(pskb_put(skb, trailer, alen), esp->auth.work_icv, alen); } error: @@ -162,15 +162,16 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) /* If integrity check is required, do this. */ if (esp->auth.icv_full_len) { - u8 sum[esp->auth.icv_full_len]; - u8 sum1[alen]; + u8 sum[alen]; - esp->auth.icv(esp, skb, 0, skb->len-alen, sum); + ret = esp_mac_digest(esp, skb, 0, skb->len - alen); + if (ret) + goto out; - if (skb_copy_bits(skb, skb->len-alen, sum1, alen)) + if (skb_copy_bits(skb, skb->len - alen, sum, alen)) BUG(); - if (unlikely(memcmp(sum, sum1, alen))) { + if (unlikely(memcmp(esp->auth.work_icv, sum, alen))) { x->stats.integrity_failed++; ret = -EINVAL; goto out; @@ -279,7 +280,7 @@ static void esp6_destroy(struct xfrm_state *x) esp->conf.tfm = NULL; kfree(esp->conf.ivec); esp->conf.ivec = NULL; - crypto_free_tfm(esp->auth.tfm); + crypto_free_hash(esp->auth.tfm); esp->auth.tfm = NULL; kfree(esp->auth.work_icv); esp->auth.work_icv = NULL; @@ -308,24 +309,29 @@ static int esp6_init_state(struct xfrm_state *x) if (x->aalg) { struct xfrm_algo_desc *aalg_desc; + struct crypto_hash *hash; esp->auth.key = x->aalg->alg_key; esp->auth.key_len = (x->aalg->alg_key_len+7)/8; - esp->auth.tfm = crypto_alloc_tfm(x->aalg->alg_name, 0); - if (esp->auth.tfm == NULL) + hash = crypto_alloc_hash(x->aalg->alg_name, 0, + CRYPTO_ALG_ASYNC); + if (IS_ERR(hash)) + goto error; + + esp->auth.tfm = hash; + if (crypto_hash_setkey(hash, esp->auth.key, esp->auth.key_len)) goto error; - esp->auth.icv = esp_hmac_digest; aalg_desc = xfrm_aalg_get_byname(x->aalg->alg_name, 0); BUG_ON(!aalg_desc); if (aalg_desc->uinfo.auth.icv_fullbits/8 != - crypto_tfm_alg_digestsize(esp->auth.tfm)) { - printk(KERN_INFO "ESP: %s digestsize %u != %hu\n", - x->aalg->alg_name, - crypto_tfm_alg_digestsize(esp->auth.tfm), - aalg_desc->uinfo.auth.icv_fullbits/8); - goto error; + crypto_hash_digestsize(hash)) { + NETDEBUG(KERN_INFO "ESP: %s digestsize %u != %hu\n", + x->aalg->alg_name, + crypto_hash_digestsize(hash), + aalg_desc->uinfo.auth.icv_fullbits/8); + goto error; } esp->auth.icv_full_len = aalg_desc->uinfo.auth.icv_fullbits/8; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 9b03d8497fba..87918f281bb4 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -30,7 +30,8 @@ */ static struct xfrm_algo_desc aalg_list[] = { { - .name = "digest_null", + .name = "hmac(digest_null)", + .compat = "digest_null", .uinfo = { .auth = { @@ -47,7 +48,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "md5", + .name = "hmac(md5)", + .compat = "md5", .uinfo = { .auth = { @@ -64,7 +66,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha1", + .name = "hmac(sha1)", + .compat = "sha1", .uinfo = { .auth = { @@ -81,7 +84,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "sha256", + .name = "hmac(sha256)", + .compat = "sha256", .uinfo = { .auth = { @@ -98,7 +102,8 @@ static struct xfrm_algo_desc aalg_list[] = { } }, { - .name = "ripemd160", + .name = "hmac(ripemd160)", + .compat = "ripemd160", .uinfo = { .auth = { @@ -480,11 +485,12 @@ EXPORT_SYMBOL_GPL(xfrm_count_enc_supported); /* Move to common area: it is shared with AH. */ -void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, - int offset, int len, icv_update_fn_t icv_update) +int skb_icv_walk(const struct sk_buff *skb, struct hash_desc *desc, + int offset, int len, icv_update_fn_t icv_update) { int start = skb_headlen(skb); int i, copy = start - offset; + int err; struct scatterlist sg; /* Checksum header. */ @@ -496,10 +502,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = (unsigned long)(skb->data + offset) % PAGE_SIZE; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } @@ -519,10 +527,12 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, sg.offset = frag->page_offset + offset-start; sg.length = copy; - icv_update(tfm, &sg, 1); + err = icv_update(desc, &sg, copy); + if (unlikely(err)) + return err; if (!(len -= copy)) - return; + return 0; offset += copy; } start = end; @@ -540,15 +550,19 @@ void skb_icv_walk(const struct sk_buff *skb, struct crypto_tfm *tfm, if ((copy = end - offset) > 0) { if (copy > len) copy = len; - skb_icv_walk(list, tfm, offset-start, copy, icv_update); + err = skb_icv_walk(list, desc, offset-start, + copy, icv_update); + if (unlikely(err)) + return err; if ((len -= copy) == 0) - return; + return 0; offset += copy; } start = end; } } BUG_ON(len); + return 0; } EXPORT_SYMBOL_GPL(skb_icv_walk); -- cgit v1.3-8-gc7d7 From 1b489e11d4df82514792f9f981f31976f8a94ddf Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sun, 20 Aug 2006 15:07:14 +1000 Subject: [SCTP]: Use HMAC template and hash interface This patch converts SCTP to use the new HMAC template and hash interface. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 4 ++-- include/net/sctp/sctp.h | 11 ----------- include/net/sctp/structs.h | 3 ++- net/sctp/endpointola.c | 2 +- net/sctp/sm_make_chunk.c | 37 +++++++++++++++++++++++++++---------- net/sctp/socket.c | 6 +++--- 6 files changed, 35 insertions(+), 28 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index c51541ee0247..57166bfdf8eb 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -312,9 +312,9 @@ enum { SCTP_MAX_GABS = 16 }; */ #if defined (CONFIG_SCTP_HMAC_MD5) -#define SCTP_COOKIE_HMAC_ALG "md5" +#define SCTP_COOKIE_HMAC_ALG "hmac(md5)" #elif defined (CONFIG_SCTP_HMAC_SHA1) -#define SCTP_COOKIE_HMAC_ALG "sha1" +#define SCTP_COOKIE_HMAC_ALG "hmac(sha1)" #else #define SCTP_COOKIE_HMAC_ALG NULL #endif diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 92eae0e0f3f1..1c1abce5f6b6 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -330,17 +330,6 @@ static inline void sctp_v6_exit(void) { return; } #endif /* #if defined(CONFIG_IPV6) */ -/* Some wrappers, in case crypto not available. */ -#if defined (CONFIG_CRYPTO_HMAC) -#define sctp_crypto_alloc_tfm crypto_alloc_tfm -#define sctp_crypto_free_tfm crypto_free_tfm -#define sctp_crypto_hmac crypto_hmac -#else -#define sctp_crypto_alloc_tfm(x...) NULL -#define sctp_crypto_free_tfm(x...) -#define sctp_crypto_hmac(x...) -#endif - /* Map an association to an assoc_id. */ static inline sctp_assoc_t sctp_assoc2id(const struct sctp_association *asoc) diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index e5aa7ff1f5b5..0412e730c765 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -87,6 +87,7 @@ struct sctp_bind_addr; struct sctp_ulpq; struct sctp_ep_common; struct sctp_ssnmap; +struct crypto_hash; #include @@ -264,7 +265,7 @@ struct sctp_sock { struct sctp_pf *pf; /* Access to HMAC transform. */ - struct crypto_tfm *hmac; + struct crypto_hash *hmac; /* What is our base endpointer? */ struct sctp_endpoint *ep; diff --git a/net/sctp/endpointola.c b/net/sctp/endpointola.c index ffda1d680529..35c49ff2d062 100644 --- a/net/sctp/endpointola.c +++ b/net/sctp/endpointola.c @@ -173,7 +173,7 @@ static void sctp_endpoint_destroy(struct sctp_endpoint *ep) SCTP_ASSERT(ep->base.dead, "Endpoint is not dead", return); /* Free up the HMAC transform. */ - sctp_crypto_free_tfm(sctp_sk(ep->base.sk)->hmac); + crypto_free_hash(sctp_sk(ep->base.sk)->hmac); /* Cleanup. */ sctp_inq_free(&ep->base.inqueue); diff --git a/net/sctp/sm_make_chunk.c b/net/sctp/sm_make_chunk.c index 17b509282cf2..7745bdea7817 100644 --- a/net/sctp/sm_make_chunk.c +++ b/net/sctp/sm_make_chunk.c @@ -1282,10 +1282,8 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, retval = kmalloc(*cookie_len, GFP_ATOMIC); - if (!retval) { - *cookie_len = 0; + if (!retval) goto nodata; - } /* Clear this memory since we are sending this data structure * out on the network. @@ -1321,19 +1319,29 @@ static sctp_cookie_param_t *sctp_pack_cookie(const struct sctp_endpoint *ep, ntohs(init_chunk->chunk_hdr->length), raw_addrs, addrs_len); if (sctp_sk(ep->base.sk)->hmac) { + struct hash_desc desc; + /* Sign the message. */ sg.page = virt_to_page(&cookie->c); sg.offset = (unsigned long)(&cookie->c) % PAGE_SIZE; sg.length = bodysize; keylen = SCTP_SECRET_SIZE; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, cookie->signature); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, cookie->signature)) + goto free_cookie; } -nodata: return retval; + +free_cookie: + kfree(retval); +nodata: + *cookie_len = 0; + return NULL; } /* Unpack the cookie from COOKIE ECHO chunk, recreating the association. */ @@ -1354,6 +1362,7 @@ struct sctp_association *sctp_unpack_cookie( sctp_scope_t scope; struct sk_buff *skb = chunk->skb; struct timeval tv; + struct hash_desc desc; /* Header size is static data prior to the actual cookie, including * any padding. @@ -1389,17 +1398,25 @@ struct sctp_association *sctp_unpack_cookie( sg.offset = (unsigned long)(bear_cookie) % PAGE_SIZE; sg.length = bodysize; key = (char *)ep->secret_key[ep->current_key]; + desc.tfm = sctp_sk(ep->base.sk)->hmac; + desc.flags = 0; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, &sg, - 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Try the previous key. */ key = (char *)ep->secret_key[ep->last_key]; memset(digest, 0x00, SCTP_SIGNATURE_SIZE); - sctp_crypto_hmac(sctp_sk(ep->base.sk)->hmac, key, &keylen, - &sg, 1, digest); + if (crypto_hash_setkey(desc.tfm, key, keylen) || + crypto_hash_digest(&desc, &sg, bodysize, digest)) { + *error = -SCTP_IERROR_NOMEM; + goto fail; + } if (memcmp(digest, cookie->signature, SCTP_SIGNATURE_SIZE)) { /* Yikes! Still bad signature! */ diff --git a/net/sctp/socket.c b/net/sctp/socket.c index dab15949958e..85caf7963886 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -4898,7 +4898,7 @@ SCTP_STATIC int sctp_stream_listen(struct sock *sk, int backlog) int sctp_inet_listen(struct socket *sock, int backlog) { struct sock *sk = sock->sk; - struct crypto_tfm *tfm=NULL; + struct crypto_hash *tfm = NULL; int err = -EINVAL; if (unlikely(backlog < 0)) @@ -4911,7 +4911,7 @@ int sctp_inet_listen(struct socket *sock, int backlog) /* Allocate HMAC for generating cookie. */ if (sctp_hmac_alg) { - tfm = sctp_crypto_alloc_tfm(sctp_hmac_alg, 0); + tfm = crypto_alloc_hash(sctp_hmac_alg, 0, CRYPTO_ALG_ASYNC); if (!tfm) { err = -ENOSYS; goto out; @@ -4937,7 +4937,7 @@ out: sctp_release_sock(sk); return err; cleanup: - sctp_crypto_free_tfm(tfm); + crypto_free_hash(tfm); goto out; } -- cgit v1.3-8-gc7d7 From e4d5b79c661c7cfca9d8d5afd040a295f128d3cb Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Sat, 26 Aug 2006 18:12:40 +1000 Subject: [CRYPTO] users: Use crypto_comp and crypto_has_* This patch converts all users to use the new crypto_comp type and the crypto_has_* functions. Signed-off-by: Herbert Xu --- crypto/tcrypt.c | 8 ++++---- drivers/crypto/padlock.c | 6 +++--- drivers/net/ppp_mppe.c | 4 ++-- include/linux/crypto.h | 5 +++++ include/net/ipcomp.h | 5 ++--- net/ipv4/ipcomp.c | 25 +++++++++++++------------ net/ipv6/ipcomp6.c | 25 +++++++++++++------------ net/xfrm/xfrm_algo.c | 27 ++++++++++++++++++--------- 8 files changed, 60 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/crypto/tcrypt.c b/crypto/tcrypt.c index 840ab8be0b96..83307420d31c 100644 --- a/crypto/tcrypt.c +++ b/crypto/tcrypt.c @@ -749,7 +749,7 @@ static void test_deflate(void) { unsigned int i; char result[COMP_BUF_SIZE]; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; struct comp_testvec *tv; unsigned int tsize; @@ -821,7 +821,7 @@ static void test_deflate(void) ilen, dlen); } out: - crypto_free_tfm(tfm); + crypto_free_comp(tfm); } static void test_available(void) @@ -830,8 +830,8 @@ static void test_available(void) while (*name) { printk("alg %s ", *name); - printk((crypto_alg_available(*name, 0)) ? - "found\n" : "not found\n"); + printk(crypto_has_alg(*name, 0, CRYPTO_ALG_ASYNC) ? + "found\n" : "not found\n"); name++; } } diff --git a/drivers/crypto/padlock.c b/drivers/crypto/padlock.c index ce581684f4b4..d6d7dd5bb98c 100644 --- a/drivers/crypto/padlock.c +++ b/drivers/crypto/padlock.c @@ -26,13 +26,13 @@ static int __init padlock_init(void) { int success = 0; - if (crypto_alg_available("aes-padlock", 0)) + if (crypto_has_cipher("aes-padlock", 0, 0)) success++; - if (crypto_alg_available("sha1-padlock", 0)) + if (crypto_has_hash("sha1-padlock", 0, 0)) success++; - if (crypto_alg_available("sha256-padlock", 0)) + if (crypto_has_hash("sha256-padlock", 0, 0)) success++; if (!success) { diff --git a/drivers/net/ppp_mppe.c b/drivers/net/ppp_mppe.c index e7a0eb4fca60..f3655fd772f5 100644 --- a/drivers/net/ppp_mppe.c +++ b/drivers/net/ppp_mppe.c @@ -710,8 +710,8 @@ static struct compressor ppp_mppe = { static int __init ppp_mppe_init(void) { int answer; - if (!(crypto_alg_available("ecb(arc4)", 0) && - crypto_alg_available("sha1", 0))) + if (!(crypto_has_blkcipher("ecb(arc4)", 0, CRYPTO_ALG_ASYNC) && + crypto_has_hash("sha1", 0, CRYPTO_ALG_ASYNC))) return -ENODEV; sha_pad = kmalloc(sizeof(struct sha_pad), GFP_KERNEL); diff --git a/include/linux/crypto.h b/include/linux/crypto.h index cf91c4c0638b..d4f9948b64b1 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -928,6 +928,11 @@ static inline int crypto_has_comp(const char *alg_name, u32 type, u32 mask) return crypto_has_alg(alg_name, type, mask); } +static inline const char *crypto_comp_name(struct crypto_comp *tfm) +{ + return crypto_tfm_alg_name(crypto_comp_tfm(tfm)); +} + static inline struct compress_tfm *crypto_comp_crt(struct crypto_comp *tfm) { return &crypto_comp_tfm(tfm)->crt_compress; diff --git a/include/net/ipcomp.h b/include/net/ipcomp.h index b94e3047b4d9..87c1af3e5e82 100644 --- a/include/net/ipcomp.h +++ b/include/net/ipcomp.h @@ -1,15 +1,14 @@ #ifndef _NET_IPCOMP_H #define _NET_IPCOMP_H +#include #include #define IPCOMP_SCRATCH_SIZE 65400 -struct crypto_tfm; - struct ipcomp_data { u16 threshold; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; }; #endif diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index a0c28b2b756e..5bb9c9f03fb6 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -32,7 +32,7 @@ struct ipcomp_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -46,7 +46,7 @@ static int ipcomp_decompress(struct xfrm_state *x, struct sk_buff *skb) int err, plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; plen = skb->len; @@ -107,7 +107,7 @@ static int ipcomp_compress(struct xfrm_state *x, struct sk_buff *skb) struct iphdr *iph = skb->nh.iph; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; ihlen = iph->ihl * 4; @@ -302,7 +302,7 @@ static void **ipcomp_alloc_scratches(void) return scratches; } -static void ipcomp_free_tfms(struct crypto_tfm **tfms) +static void ipcomp_free_tfms(struct crypto_comp **tfms) { struct ipcomp_tfms *pos; int cpu; @@ -324,28 +324,28 @@ static void ipcomp_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp_alloc_tfms(const char *alg_name) { struct ipcomp_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -359,12 +359,13 @@ static struct crypto_tfm **ipcomp_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 7e4d1c17bfbc..a81e9e9d93bd 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -53,7 +53,7 @@ struct ipcomp6_tfms { struct list_head list; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int users; }; @@ -70,7 +70,7 @@ static int ipcomp6_input(struct xfrm_state *x, struct sk_buff *skb) int plen, dlen; struct ipcomp_data *ipcd = x->data; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; if (skb_linearize_cow(skb)) @@ -129,7 +129,7 @@ static int ipcomp6_output(struct xfrm_state *x, struct sk_buff *skb) struct ipcomp_data *ipcd = x->data; int plen, dlen; u8 *start, *scratch; - struct crypto_tfm *tfm; + struct crypto_comp *tfm; int cpu; hdr_len = skb->h.raw - skb->data; @@ -301,7 +301,7 @@ static void **ipcomp6_alloc_scratches(void) return scratches; } -static void ipcomp6_free_tfms(struct crypto_tfm **tfms) +static void ipcomp6_free_tfms(struct crypto_comp **tfms) { struct ipcomp6_tfms *pos; int cpu; @@ -323,28 +323,28 @@ static void ipcomp6_free_tfms(struct crypto_tfm **tfms) return; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = *per_cpu_ptr(tfms, cpu); - crypto_free_tfm(tfm); + struct crypto_comp *tfm = *per_cpu_ptr(tfms, cpu); + crypto_free_comp(tfm); } free_percpu(tfms); } -static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) +static struct crypto_comp **ipcomp6_alloc_tfms(const char *alg_name) { struct ipcomp6_tfms *pos; - struct crypto_tfm **tfms; + struct crypto_comp **tfms; int cpu; /* This can be any valid CPU ID so we don't need locking. */ cpu = raw_smp_processor_id(); list_for_each_entry(pos, &ipcomp6_tfms_list, list) { - struct crypto_tfm *tfm; + struct crypto_comp *tfm; tfms = pos->tfms; tfm = *per_cpu_ptr(tfms, cpu); - if (!strcmp(crypto_tfm_alg_name(tfm), alg_name)) { + if (!strcmp(crypto_comp_name(tfm), alg_name)) { pos->users++; return tfms; } @@ -358,12 +358,13 @@ static struct crypto_tfm **ipcomp6_alloc_tfms(const char *alg_name) INIT_LIST_HEAD(&pos->list); list_add(&pos->list, &ipcomp6_tfms_list); - pos->tfms = tfms = alloc_percpu(struct crypto_tfm *); + pos->tfms = tfms = alloc_percpu(struct crypto_comp *); if (!tfms) goto error; for_each_possible_cpu(cpu) { - struct crypto_tfm *tfm = crypto_alloc_tfm(alg_name, 0); + struct crypto_comp *tfm = crypto_alloc_comp(alg_name, 0, + CRYPTO_ALG_ASYNC); if (!tfm) goto error; *per_cpu_ptr(tfms, cpu) = tfm; diff --git a/net/xfrm/xfrm_algo.c b/net/xfrm/xfrm_algo.c index 87918f281bb4..5a0dbeb6bbe8 100644 --- a/net/xfrm/xfrm_algo.c +++ b/net/xfrm/xfrm_algo.c @@ -363,8 +363,8 @@ struct xfrm_algo_desc *xfrm_calg_get_byid(int alg_id) EXPORT_SYMBOL_GPL(xfrm_calg_get_byid); static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, - int entries, char *name, - int probe) + int entries, u32 type, u32 mask, + char *name, int probe) { int i, status; @@ -382,7 +382,7 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, if (!probe) break; - status = crypto_alg_available(name, 0); + status = crypto_has_alg(name, type, mask | CRYPTO_ALG_ASYNC); if (!status) break; @@ -394,19 +394,25 @@ static struct xfrm_algo_desc *xfrm_get_byname(struct xfrm_algo_desc *list, struct xfrm_algo_desc *xfrm_aalg_get_byname(char *name, int probe) { - return xfrm_get_byname(aalg_list, aalg_entries(), name, probe); + return xfrm_get_byname(aalg_list, aalg_entries(), + CRYPTO_ALG_TYPE_HASH, CRYPTO_ALG_TYPE_HASH_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_aalg_get_byname); struct xfrm_algo_desc *xfrm_ealg_get_byname(char *name, int probe) { - return xfrm_get_byname(ealg_list, ealg_entries(), name, probe); + return xfrm_get_byname(ealg_list, ealg_entries(), + CRYPTO_ALG_TYPE_BLKCIPHER, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_ealg_get_byname); struct xfrm_algo_desc *xfrm_calg_get_byname(char *name, int probe) { - return xfrm_get_byname(calg_list, calg_entries(), name, probe); + return xfrm_get_byname(calg_list, calg_entries(), + CRYPTO_ALG_TYPE_COMPRESS, CRYPTO_ALG_TYPE_MASK, + name, probe); } EXPORT_SYMBOL_GPL(xfrm_calg_get_byname); @@ -441,19 +447,22 @@ void xfrm_probe_algs(void) BUG_ON(in_softirq()); for (i = 0; i < aalg_entries(); i++) { - status = crypto_alg_available(aalg_list[i].name, 0); + status = crypto_has_hash(aalg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (aalg_list[i].available != status) aalg_list[i].available = status; } for (i = 0; i < ealg_entries(); i++) { - status = crypto_alg_available(ealg_list[i].name, 0); + status = crypto_has_blkcipher(ealg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (ealg_list[i].available != status) ealg_list[i].available = status; } for (i = 0; i < calg_entries(); i++) { - status = crypto_alg_available(calg_list[i].name, 0); + status = crypto_has_comp(calg_list[i].name, 0, + CRYPTO_ALG_ASYNC); if (calg_list[i].available != status) calg_list[i].available = status; } -- cgit v1.3-8-gc7d7 From 892c141e62982272b9c738b5520ad0e5e1ad7b42 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:08:56 -0700 Subject: [MLSXFRM]: Add security sid to sock This adds security for IP sockets at the sock level. Security at the sock level is needed to enforce the SELinux security policy for security associations even when a sock is orphaned (such as in the TCP LAST_ACK state). This will also be used to enforce SELinux controls over data arriving at or leaving a child socket while it's still waiting to be accepted. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 12 ++++++++++++ include/net/sock.h | 13 +++++++++++++ net/core/sock.c | 2 +- security/dummy.c | 5 +++++ security/selinux/hooks.c | 38 +++++++++++++++++++++----------------- security/selinux/include/objsec.h | 1 + 6 files changed, 53 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/security.h b/include/linux/security.h index 6bc2aad494ff..4d7fb59996b0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -812,6 +812,8 @@ struct swap_info_struct; * which is used to copy security attributes between local stream sockets. * @sk_free_security: * Deallocate security structure. + * @sk_clone_security: + * Clone/copy security structure. * @sk_getsid: * Retrieve the LSM-specific sid for the sock to enable caching of network * authorizations. @@ -1332,6 +1334,7 @@ struct security_operations { int (*socket_getpeersec_dgram) (struct socket *sock, struct sk_buff *skb, u32 *secid); int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); + void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); #endif /* CONFIG_SECURITY_NETWORK */ @@ -2885,6 +2888,11 @@ static inline void security_sk_free(struct sock *sk) return security_ops->sk_free_security(sk); } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ + return security_ops->sk_clone_security(sk, newsk); +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return security_ops->sk_getsid(sk, fl, dir); @@ -3011,6 +3019,10 @@ static inline void security_sk_free(struct sock *sk) { } +static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) +{ +} + static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; diff --git a/include/net/sock.h b/include/net/sock.h index 324b3ea233d6..91cdceb3c028 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -972,6 +972,19 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) write_unlock_bh(&sk->sk_callback_lock); } +static inline void sock_copy(struct sock *nsk, const struct sock *osk) +{ +#ifdef CONFIG_SECURITY_NETWORK + void *sptr = nsk->sk_security; +#endif + + memcpy(nsk, osk, osk->sk_prot->obj_size); +#ifdef CONFIG_SECURITY_NETWORK + nsk->sk_security = sptr; + security_sk_clone(osk, nsk); +#endif +} + extern int sock_i_uid(struct sock *sk); extern unsigned long sock_i_ino(struct sock *sk); diff --git a/net/core/sock.c b/net/core/sock.c index 51fcfbc041a7..b67d868649cd 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -911,7 +911,7 @@ struct sock *sk_clone(const struct sock *sk, const gfp_t priority) if (newsk != NULL) { struct sk_filter *filter; - memcpy(newsk, sk, sk->sk_prot->obj_size); + sock_copy(newsk, sk); /* SANITY */ sk_node_init(&newsk->sk_node); diff --git a/security/dummy.c b/security/dummy.c index 58c6d399c844..bd3bc5faa9a8 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -805,6 +805,10 @@ static inline void dummy_sk_free_security (struct sock *sk) { } +static inline void dummy_sk_clone_security (const struct sock *sk, struct sock *newsk) +{ +} + static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) { return 0; @@ -1060,6 +1064,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, socket_getpeersec_dgram); set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); + set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5d1b8c733199..d67abf77584a 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -269,15 +269,13 @@ static int sk_alloc_security(struct sock *sk, int family, gfp_t priority) { struct sk_security_struct *ssec; - if (family != PF_UNIX) - return 0; - ssec = kzalloc(sizeof(*ssec), priority); if (!ssec) return -ENOMEM; ssec->sk = sk; ssec->peer_sid = SECINITSID_UNLABELED; + ssec->sid = SECINITSID_UNLABELED; sk->sk_security = ssec; return 0; @@ -287,9 +285,6 @@ static void sk_free_security(struct sock *sk) { struct sk_security_struct *ssec = sk->sk_security; - if (sk->sk_family != PF_UNIX) - return; - sk->sk_security = NULL; kfree(ssec); } @@ -3068,6 +3063,7 @@ static void selinux_socket_post_create(struct socket *sock, int family, { struct inode_security_struct *isec; struct task_security_struct *tsec; + struct sk_security_struct *sksec; u32 newsid; isec = SOCK_INODE(sock)->i_security; @@ -3078,6 +3074,11 @@ static void selinux_socket_post_create(struct socket *sock, int family, isec->sid = kern ? SECINITSID_KERNEL : newsid; isec->initialized = 1; + if (sock->sk) { + sksec = sock->sk->sk_security; + sksec->sid = isec->sid; + } + return; } @@ -3551,22 +3552,24 @@ static void selinux_sk_free_security(struct sock *sk) sk_free_security(sk); } -static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) { - struct inode_security_struct *isec; - u32 sock_sid = SECINITSID_ANY_SOCKET; + struct sk_security_struct *ssec = sk->sk_security; + struct sk_security_struct *newssec = newsk->sk_security; + newssec->sid = ssec->sid; + newssec->peer_sid = ssec->peer_sid; +} + +static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +{ if (!sk) return selinux_no_sk_sid(fl); + else { + struct sk_security_struct *sksec = sk->sk_security; - read_lock_bh(&sk->sk_callback_lock); - isec = get_sock_isec(sk); - - if (isec) - sock_sid = isec->sid; - - read_unlock_bh(&sk->sk_callback_lock); - return sock_sid; + return sksec->sid; + } } static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) @@ -4618,6 +4621,7 @@ static struct security_operations selinux_ops = { .socket_getpeersec_dgram = selinux_socket_getpeersec_dgram, .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, + .sk_clone_security = selinux_sk_clone_security, .sk_getsid = selinux_sk_getsid_security, #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/security/selinux/include/objsec.h b/security/selinux/include/objsec.h index 940178865fc7..79b9e0af19a0 100644 --- a/security/selinux/include/objsec.h +++ b/security/selinux/include/objsec.h @@ -99,6 +99,7 @@ struct netif_security_struct { struct sk_security_struct { struct sock *sk; /* back pointer to sk object */ + u32 sid; /* SID of this object */ u32 peer_sid; /* SID of peer */ }; -- cgit v1.3-8-gc7d7 From b6340fcd761acf9249b3acbc95c4dc555d9beb07 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:28:37 -0700 Subject: [MLSXFRM]: Add security sid to flowi This adds security to flow key for labeling of flows as also to allow for making flow cache lookups based on the security label seemless. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- Documentation/networking/secid.txt | 14 ++++++++++++++ include/net/flow.h | 1 + 2 files changed, 15 insertions(+) create mode 100644 Documentation/networking/secid.txt (limited to 'include/net') diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt new file mode 100644 index 000000000000..95ea06784333 --- /dev/null +++ b/Documentation/networking/secid.txt @@ -0,0 +1,14 @@ +flowi structure: + +The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate +the label of the flow. This label of the flow is currently used in selecting +matching labeled xfrm(s). + +If this is an outbound flow, the label is derived from the socket, if any, or +the incoming packet this flow is being generated as a response to (e.g. tcp +resets, timewait ack, etc.). It is also conceivable that the label could be +derived from other sources such as process context, device, etc., in special +cases, as may be appropriate. + +If this is an inbound flow, the label is derived from the IPSec security +associations, if any, used by the packet. diff --git a/include/net/flow.h b/include/net/flow.h index 04d89f763451..1cee5a83433a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -78,6 +78,7 @@ struct flowi { #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi + __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); #define FLOW_DIR_IN 0 -- cgit v1.3-8-gc7d7 From e0d1caa7b0d5f02e4f34aa09c695d04251310c6c Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:29:07 -0700 Subject: [MLSXFRM]: Flow based matching of xfrm policy and state This implements a seemless mechanism for xfrm policy selection and state matching based on the flow sid. This also includes the necessary SELinux enforcement pieces. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 106 +++++++++++++++++---- include/net/flow.h | 4 +- net/core/flow.c | 7 +- net/xfrm/xfrm_policy.c | 28 +++--- net/xfrm/xfrm_state.c | 12 ++- security/dummy.c | 23 ++++- security/selinux/hooks.c | 7 +- security/selinux/include/xfrm.h | 23 +++-- security/selinux/xfrm.c | 199 +++++++++++++++++++++++++++++++++------- 9 files changed, 329 insertions(+), 80 deletions(-) (limited to 'include/net') diff --git a/include/linux/security.h b/include/linux/security.h index 4d7fb59996b0..2c4921d79d19 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct ctl_table; @@ -825,9 +826,8 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). - * Allocate a security structure to the xp->security field. - * The security field is initialized to NULL when the xfrm_policy is - * allocated. + * Allocate a security structure to the xp->security field; the security + * field is initialized to NULL when the xfrm_policy is allocated. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -846,9 +846,14 @@ struct swap_info_struct; * Database by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level SA generation program (e.g., setkey or racoon). - * Allocate a security structure to the x->security field. The - * security field is initialized to NULL when the xfrm_state is - * allocated. + * @polsec contains the security context information associated with a xfrm + * policy rule from which to take the base context. polsec must be NULL + * when sec_ctx is specified. + * @secid contains the secid from which to take the mls portion of the context. + * Allocate a security structure to the x->security field; the security + * field is initialized to NULL when the xfrm_state is allocated. Set the + * context to correspond to either sec_ctx or polsec, with the mls portion + * taken from secid in the latter case. * Return 0 if operation was successful (memory to allocate, legal context). * @xfrm_state_free_security: * @x contains the xfrm_state. @@ -859,13 +864,26 @@ struct swap_info_struct; * @xfrm_policy_lookup: * @xp contains the xfrm_policy for which the access control is being * checked. - * @sk_sid contains the sock security label that is used to authorize + * @fl_secid contains the flow security label that is used to authorize * access to the policy xp. * @dir contains the direction of the flow (input or output). - * Check permission when a sock selects a xfrm_policy for processing + * Check permission when a flow selects a xfrm_policy for processing * XFRMs on a packet. The hook is called when selecting either a * per-socket policy or a generic xfrm policy. * Return 0 if permission is granted. + * @xfrm_state_pol_flow_match: + * @x contains the state to match. + * @xp contains the policy to check for a match. + * @fl contains the flow to check for a match. + * Return 1 if there is a match. + * @xfrm_flow_state_match: + * @fl contains the flow key to match. + * @xfrm points to the xfrm_state to match. + * Return 1 if there is a match. + * @xfrm_decode_session: + * @skb points to skb to decode. + * @fl points to the flow key to set. + * Return 0 if successful decoding. * * Security hooks affecting all Key Management operations * @@ -1343,10 +1361,16 @@ struct security_operations { int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); - int (*xfrm_state_alloc_security) (struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_state_alloc_security) (struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *polsec, + u32 secid); void (*xfrm_state_free_security) (struct xfrm_state *x); int (*xfrm_state_delete_security) (struct xfrm_state *x); - int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 sk_sid, u8 dir); + int (*xfrm_policy_lookup)(struct xfrm_policy *xp, u32 fl_secid, u8 dir); + int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); + int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); + int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -3050,9 +3074,18 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return security_ops->xfrm_policy_delete_security(xp); } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return security_ops->xfrm_state_alloc_security(x, sec_ctx, NULL, 0); +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { - return security_ops->xfrm_state_alloc_security(x, sec_ctx); + if (!polsec) + return 0; + return security_ops->xfrm_state_alloc_security(x, NULL, polsec, secid); } static inline int security_xfrm_state_delete(struct xfrm_state *x) @@ -3065,9 +3098,25 @@ static inline void security_xfrm_state_free(struct xfrm_state *x) security_ops->xfrm_state_free_security(x); } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) +{ + return security_ops->xfrm_policy_lookup(xp, fl_secid, dir); +} + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return security_ops->xfrm_state_pol_flow_match(x, xp, fl); +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return security_ops->xfrm_flow_state_match(fl, xfrm); +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_policy_lookup(xp, sk_sid, dir); + return security_ops->xfrm_decode_session(skb, fl); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3089,7 +3138,14 @@ static inline int security_xfrm_policy_delete(struct xfrm_policy *xp) return 0; } -static inline int security_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static inline int security_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx) +{ + return 0; +} + +static inline int security_xfrm_state_alloc_acquire(struct xfrm_state *x, + struct xfrm_sec_ctx *polsec, u32 secid) { return 0; } @@ -3103,10 +3159,28 @@ static inline int security_xfrm_state_delete(struct xfrm_state *x) return 0; } -static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +static inline int security_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { return 0; } + +static inline int security_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static inline int security_xfrm_flow_state_match(struct flowi *fl, + struct xfrm_state *xfrm) +{ + return 1; +} + +static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/flow.h b/include/net/flow.h index 1cee5a83433a..21d988b2058a 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -86,10 +86,10 @@ struct flowi { #define FLOW_DIR_FWD 2 struct sock; -typedef void (*flow_resolve_t)(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +typedef void (*flow_resolve_t)(struct flowi *key, u16 family, u8 dir, void **objp, atomic_t **obj_refp); -extern void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +extern void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver); extern void flow_cache_flush(void); extern atomic_t flow_cache_genid; diff --git a/net/core/flow.c b/net/core/flow.c index 2191af5f26ac..645241165e6c 100644 --- a/net/core/flow.c +++ b/net/core/flow.c @@ -32,7 +32,6 @@ struct flow_cache_entry { u8 dir; struct flowi key; u32 genid; - u32 sk_sid; void *object; atomic_t *object_ref; }; @@ -165,7 +164,7 @@ static int flow_key_compare(struct flowi *key1, struct flowi *key2) return 0; } -void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, +void *flow_cache_lookup(struct flowi *key, u16 family, u8 dir, flow_resolve_t resolver) { struct flow_cache_entry *fle, **head; @@ -189,7 +188,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, for (fle = *head; fle; fle = fle->next) { if (fle->family == family && fle->dir == dir && - fle->sk_sid == sk_sid && flow_key_compare(key, &fle->key) == 0) { if (fle->genid == atomic_read(&flow_cache_genid)) { void *ret = fle->object; @@ -214,7 +212,6 @@ void *flow_cache_lookup(struct flowi *key, u32 sk_sid, u16 family, u8 dir, *head = fle; fle->family = family; fle->dir = dir; - fle->sk_sid = sk_sid; memcpy(&fle->key, key, sizeof(*key)); fle->object = NULL; flow_count(cpu)++; @@ -226,7 +223,7 @@ nocache: void *obj; atomic_t *obj_ref; - resolver(key, sk_sid, family, dir, &obj, &obj_ref); + resolver(key, family, dir, &obj, &obj_ref); if (fle) { fle->genid = atomic_read(&flow_cache_genid); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 3da67ca2c3ce..79405daadc52 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -597,7 +597,7 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, void **objp, atomic_t **obj_refp) { struct xfrm_policy *pol; @@ -613,7 +613,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u32 sk_sid, u16 family, u8 dir, match = xfrm_selector_match(sel, fl, family); if (match) { - if (!security_xfrm_policy_lookup(pol, sk_sid, dir)) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { xfrm_pol_hold(pol); break; } @@ -641,7 +641,7 @@ static inline int policy_to_flow_dir(int dir) }; } -static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl, u32 sk_sid) +static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struct flowi *fl) { struct xfrm_policy *pol; @@ -652,7 +652,7 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc int err = 0; if (match) - err = security_xfrm_policy_lookup(pol, sk_sid, policy_to_flow_dir(dir)); + err = security_xfrm_policy_lookup(pol, fl->secid, policy_to_flow_dir(dir)); if (match && !err) xfrm_pol_hold(pol); @@ -862,19 +862,20 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u32 genid; u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - u32 sk_sid = security_sk_sid(sk, fl, dir); + + fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; if (sk && sk->sk_policy[1]) - policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl, sk_sid); + policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) return 0; - policy = flow_cache_lookup(fl, sk_sid, dst_orig->ops->family, + policy = flow_cache_lookup(fl, dst_orig->ops->family, dir, xfrm_policy_lookup); } @@ -1032,13 +1033,15 @@ int xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family) { struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + int err; if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); + err = security_xfrm_decode_session(skb, fl); xfrm_policy_put_afinfo(afinfo); - return 0; + return err; } EXPORT_SYMBOL(xfrm_decode_session); @@ -1058,14 +1061,11 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_policy *pol; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); - u32 sk_sid; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; nf_nat_decode_session(skb, &fl, family); - sk_sid = security_sk_sid(sk, &fl, fl_dir); - /* First, check used SA against their selectors. */ if (skb->sp) { int i; @@ -1079,10 +1079,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = NULL; if (sk && sk->sk_policy[dir]) - pol = xfrm_sk_policy_lookup(sk, dir, &fl, sk_sid); + pol = xfrm_sk_policy_lookup(sk, dir, &fl); if (!pol) - pol = flow_cache_lookup(&fl, sk_sid, family, fl_dir, + pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); if (!pol) @@ -1298,6 +1298,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (fl && !xfrm_selector_match(&dst->xfrm->sel, fl, family)) return 0; + if (fl && !security_xfrm_flow_state_match(fl, dst->xfrm)) + return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 0021aad5db43..be02bd981d12 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -367,7 +367,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, */ if (x->km.state == XFRM_STATE_VALID) { if (!xfrm_selector_match(&x->sel, fl, family) || - !xfrm_sec_ctx_match(pol->security, x->security)) + !security_xfrm_state_pol_flow_match(x, pol, fl)) continue; if (!best || best->km.dying > x->km.dying || @@ -379,7 +379,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, } else if (x->km.state == XFRM_STATE_ERROR || x->km.state == XFRM_STATE_EXPIRED) { if (xfrm_selector_match(&x->sel, fl, family) && - xfrm_sec_ctx_match(pol->security, x->security)) + security_xfrm_state_pol_flow_match(x, pol, fl)) error = -ESRCH; } } @@ -403,6 +403,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, * to current session. */ xfrm_init_tempsel(x, fl, tmpl, daddr, saddr, family); + error = security_xfrm_state_alloc_acquire(x, pol->security, fl->secid); + if (error) { + x->km.state = XFRM_STATE_DEAD; + xfrm_state_put(x); + x = NULL; + goto out; + } + if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); diff --git a/security/dummy.c b/security/dummy.c index bd3bc5faa9a8..c1f10654871e 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -835,7 +835,8 @@ static int dummy_xfrm_policy_delete_security(struct xfrm_policy *xp) return 0; } -static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_state_alloc_security(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid) { return 0; } @@ -853,6 +854,23 @@ static int dummy_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) { return 0; } + +static int dummy_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl) +{ + return 1; +} + +static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + return 1; +} + +static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + return 0; +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ static int dummy_register_security (const char *name, struct security_operations *ops) { @@ -1076,6 +1094,9 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, xfrm_state_free_security); set_to_dummy_if_null(ops, xfrm_state_delete_security); set_to_dummy_if_null(ops, xfrm_policy_lookup); + set_to_dummy_if_null(ops, xfrm_state_pol_flow_match); + set_to_dummy_if_null(ops, xfrm_flow_state_match); + set_to_dummy_if_null(ops, xfrm_decode_session); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS set_to_dummy_if_null(ops, key_alloc); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index d67abf77584a..5c189da07bc9 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3468,7 +3468,7 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb); + err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); out: return err; } @@ -3720,7 +3720,7 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb); + err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4633,6 +4633,9 @@ static struct security_operations selinux_ops = { .xfrm_state_free_security = selinux_xfrm_state_free, .xfrm_state_delete_security = selinux_xfrm_state_delete, .xfrm_policy_lookup = selinux_xfrm_policy_lookup, + .xfrm_state_pol_flow_match = selinux_xfrm_state_pol_flow_match, + .xfrm_flow_state_match = selinux_xfrm_flow_state_match, + .xfrm_decode_session = selinux_xfrm_decode_session, #endif #ifdef CONFIG_KEYS diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index c96498a10eb8..f51a3e84bd9b 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -2,6 +2,7 @@ * SELinux support for the XFRM LSM hooks * * Author : Trent Jaeger, + * Updated : Venkat Yekkirala, */ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ @@ -10,10 +11,16 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_state_alloc(struct xfrm_state *x, + struct xfrm_user_sec_ctx *sec_ctx, struct xfrm_sec_ctx *pol, u32 secid); void selinux_xfrm_state_free(struct xfrm_state *x); int selinux_xfrm_state_delete(struct xfrm_state *x); -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir); +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, + struct xfrm_policy *xp, struct flowi *fl); +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); + /* * Extract the security blob from the sock (it's actually on the socket) @@ -39,17 +46,21 @@ static inline u32 selinux_no_sk_sid(struct flowi *fl) } #ifdef CONFIG_SECURITY_NETWORK_XFRM -int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb); -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb); +int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, + struct avc_audit_data *ad); +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad); u32 selinux_socket_getpeer_stream(struct sock *sk); u32 selinux_socket_getpeer_dgram(struct sk_buff *skb); #else -static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } -static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +static inline int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { return 0; } diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index 6c985ced8102..a502b0540e3d 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -6,7 +6,12 @@ * Authors: Serge Hallyn * Trent Jaeger * + * Updated: Venkat Yekkirala + * + * Granular IPSec Associations for use in MLS environments. + * * Copyright (C) 2005 International Business Machines Corporation + * Copyright (C) 2006 Trusted Computer Solutions, Inc. * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2, @@ -67,10 +72,10 @@ static inline int selinux_authorizable_xfrm(struct xfrm_state *x) } /* - * LSM hook implementation that authorizes that a socket can be used - * with the corresponding xfrm_sec_ctx and direction. + * LSM hook implementation that authorizes that a flow can use + * a xfrm policy rule. */ -int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) +int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir) { int rc = 0; u32 sel_sid = SECINITSID_UNLABELED; @@ -84,27 +89,129 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 sk_sid, u8 dir) sel_sid = ctx->ctx_sid; } - rc = avc_has_perm(sk_sid, sel_sid, SECCLASS_ASSOCIATION, - ((dir == FLOW_DIR_IN) ? ASSOCIATION__RECVFROM : - ((dir == FLOW_DIR_OUT) ? ASSOCIATION__SENDTO : - (ASSOCIATION__SENDTO | ASSOCIATION__RECVFROM))), + rc = avc_has_perm(fl_secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, NULL); return rc; } +/* + * LSM hook implementation that authorizes that a state matches + * the given policy, flow combo. + */ + +int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, + struct flowi *fl) +{ + u32 state_sid; + u32 pol_sid; + int err; + + if (x->security) + state_sid = x->security->ctx_sid; + else + state_sid = SECINITSID_UNLABELED; + + if (xp->security) + pol_sid = xp->security->ctx_sid; + else + pol_sid = SECINITSID_UNLABELED; + + err = avc_has_perm(state_sid, pol_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__POLMATCH, + NULL); + + if (err) + return 0; + + return selinux_xfrm_flow_state_match(fl, x); +} + +/* + * LSM hook implementation that authorizes that a particular outgoing flow + * can use a given security association. + */ + +int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) +{ + int rc = 0; + u32 sel_sid = SECINITSID_UNLABELED; + struct xfrm_sec_ctx *ctx; + + /* Context sid is either set to label or ANY_ASSOC */ + if ((ctx = xfrm->security)) { + if (!selinux_authorizable_ctx(ctx)) + return 0; + + sel_sid = ctx->ctx_sid; + } + + rc = avc_has_perm(fl->secid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__SENDTO, + NULL)? 0:1; + + return rc; +} + +/* + * LSM hook implementation that determines the sid for the session. + */ + +int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +{ + struct sec_path *sp; + + fl->secid = SECSID_NULL; + + if (skb == NULL) + return 0; + + sp = skb->sp; + if (sp) { + int i, sid_set = 0; + + for (i = sp->len-1; i >= 0; i--) { + struct xfrm_state *x = sp->xvec[i]; + if (selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + + if (!sid_set) { + fl->secid = ctx->ctx_sid; + sid_set = 1; + } + else if (fl->secid != ctx->ctx_sid) + return -EINVAL; + } + } + } + + return 0; +} + /* * Security blob allocation for xfrm_policy and xfrm_state * CTX does not have a meaningful value on input */ -static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_user_sec_ctx *uctx) +static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, + struct xfrm_user_sec_ctx *uctx, struct xfrm_sec_ctx *pol, u32 sid) { int rc = 0; struct task_security_struct *tsec = current->security; - struct xfrm_sec_ctx *ctx; + struct xfrm_sec_ctx *ctx = NULL; + char *ctx_str = NULL; + u32 str_len; + u32 ctx_sid; + + BUG_ON(uctx && pol); + + if (pol) + goto from_policy; BUG_ON(!uctx); - BUG_ON(uctx->ctx_doi != XFRM_SC_ALG_SELINUX); + + if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) + return -EINVAL; if (uctx->ctx_len >= PAGE_SIZE) return -ENOMEM; @@ -141,9 +248,41 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, struct xfrm_us return rc; +from_policy: + BUG_ON(!pol); + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + + rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); + if (rc) + goto out; + + *ctxp = ctx = kmalloc(sizeof(*ctx) + + str_len, + GFP_ATOMIC); + + if (!ctx) { + rc = -ENOMEM; + goto out; + } + + + ctx->ctx_doi = XFRM_SC_DOI_LSM; + ctx->ctx_alg = XFRM_SC_ALG_SELINUX; + ctx->ctx_sid = ctx_sid; + ctx->ctx_len = str_len; + memcpy(ctx->ctx_str, + ctx_str, + str_len); + + goto out2; + out: *ctxp = NULL; kfree(ctx); +out2: + kfree(ctx_str); return rc; } @@ -157,7 +296,7 @@ int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx * BUG_ON(!xp); - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); return err; } @@ -217,13 +356,14 @@ int selinux_xfrm_policy_delete(struct xfrm_policy *xp) * LSM hook implementation that allocs and transfers sec_ctx spec to * xfrm_state. */ -int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_state_alloc(struct xfrm_state *x, struct xfrm_user_sec_ctx *uctx, + struct xfrm_sec_ctx *pol, u32 secid) { int err; BUG_ON(!x); - err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx); + err = selinux_xfrm_sec_ctx_alloc(&x->security, uctx, pol, secid); return err; } @@ -329,38 +469,30 @@ int selinux_xfrm_state_delete(struct xfrm_state *x) * we need to check for unlabelled access since this may not have * gone thru the IPSec process. */ -int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_sock_rcv_skb(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { int i, rc = 0; struct sec_path *sp; + u32 sel_sid = SECINITSID_UNLABELED; sp = skb->sp; if (sp) { - /* - * __xfrm_policy_check does not approve unless xfrm_policy_ok - * says that spi's match for policy and the socket. - * - * Only need to verify the existence of an authorizable sp. - */ for (i = 0; i < sp->len; i++) { struct xfrm_state *x = sp->xvec[i]; - if (x && selinux_authorizable_xfrm(x)) - goto accept; + if (x && selinux_authorizable_xfrm(x)) { + struct xfrm_sec_ctx *ctx = x->security; + sel_sid = ctx->ctx_sid; + break; + } } } - /* check SELinux sock for unlabelled access */ - rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__RECVFROM, NULL); - if (rc) - goto drop; - -accept: - return 0; + rc = avc_has_perm(isec_sid, sel_sid, SECCLASS_ASSOCIATION, + ASSOCIATION__RECVFROM, ad); -drop: return rc; } @@ -371,7 +503,8 @@ drop: * If we do have a authorizable security association, then it has already been * checked in xfrm_policy_lookup hook. */ -int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) +int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb, + struct avc_audit_data *ad) { struct dst_entry *dst; int rc = 0; @@ -391,7 +524,7 @@ int selinux_xfrm_postroute_last(u32 isec_sid, struct sk_buff *skb) } rc = avc_has_perm(isec_sid, SECINITSID_UNLABELED, SECCLASS_ASSOCIATION, - ASSOCIATION__SENDTO, NULL); + ASSOCIATION__SENDTO, ad); out: return rc; } -- cgit v1.3-8-gc7d7 From beb8d13bed80f8388f1a9a107d07ddd342e627e8 Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Fri, 4 Aug 2006 23:12:42 -0700 Subject: [MLSXFRM]: Add flow labeling This labels the flows that could utilize IPSec xfrms at the points the flows are defined so that IPSec policy and SAs at the right label can be used. The following protos are currently not handled, but they should continue to be able to use single-labeled IPSec like they currently do. ipmr ip_gre ipip igmp sit sctp ip6_tunnel (IPv6 over IPv6 tunnel device) decnet Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 38 +++++++++++++++++++++++++------------- include/net/route.h | 3 +++ net/dccp/ipv4.c | 1 + net/dccp/ipv6.c | 6 ++++++ net/ipv4/af_inet.c | 1 + net/ipv4/icmp.c | 2 ++ net/ipv4/inet_connection_sock.c | 1 + net/ipv4/ip_output.c | 2 ++ net/ipv4/netfilter/ipt_REJECT.c | 1 + net/ipv4/raw.c | 1 + net/ipv4/syncookies.c | 1 + net/ipv4/udp.c | 1 + net/ipv6/af_inet6.c | 1 + net/ipv6/datagram.c | 2 ++ net/ipv6/icmp.c | 2 ++ net/ipv6/inet6_connection_sock.c | 1 + net/ipv6/ndisc.c | 1 + net/ipv6/netfilter/ip6t_REJECT.c | 1 + net/ipv6/raw.c | 1 + net/ipv6/tcp_ipv6.c | 7 +++++++ net/ipv6/udp.c | 2 ++ net/xfrm/xfrm_policy.c | 3 +-- security/dummy.c | 7 +++---- security/selinux/hooks.c | 8 ++++---- security/selinux/include/xfrm.h | 14 +------------- security/selinux/xfrm.c | 11 +++++++---- 26 files changed, 79 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/linux/security.h b/include/linux/security.h index 2c4921d79d19..f3909d189fe0 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -32,6 +32,7 @@ #include #include #include +#include struct ctl_table; @@ -815,8 +816,8 @@ struct swap_info_struct; * Deallocate security structure. * @sk_clone_security: * Clone/copy security structure. - * @sk_getsid: - * Retrieve the LSM-specific sid for the sock to enable caching of network + * @sk_getsecid: + * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. * * Security hooks for XFRM operations. @@ -882,8 +883,9 @@ struct swap_info_struct; * Return 1 if there is a match. * @xfrm_decode_session: * @skb points to skb to decode. - * @fl points to the flow key to set. - * Return 0 if successful decoding. + * @secid points to the flow key secid to set. + * @ckall says if all xfrms used should be checked for same secid. + * Return 0 if ckall is zero or all xfrms used have the same secid. * * Security hooks affecting all Key Management operations * @@ -1353,7 +1355,7 @@ struct security_operations { int (*sk_alloc_security) (struct sock *sk, int family, gfp_t priority); void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); - unsigned int (*sk_getsid) (struct sock *sk, struct flowi *fl, u8 dir); + void (*sk_getsecid) (struct sock *sk, u32 *secid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1370,7 +1372,7 @@ struct security_operations { int (*xfrm_state_pol_flow_match)(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int (*xfrm_flow_state_match)(struct flowi *fl, struct xfrm_state *xfrm); - int (*xfrm_decode_session)(struct sk_buff *skb, struct flowi *fl); + int (*xfrm_decode_session)(struct sk_buff *skb, u32 *secid, int ckall); #endif /* CONFIG_SECURITY_NETWORK_XFRM */ /* key management security hooks */ @@ -2917,9 +2919,9 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) return security_ops->sk_clone_security(sk, newsk); } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return security_ops->sk_getsid(sk, fl, dir); + security_ops->sk_getsecid(sk, &fl->secid); } #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, @@ -3047,9 +3049,8 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) { } -static inline unsigned int security_sk_sid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -3114,9 +3115,16 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, struct xfrm_s return security_ops->xfrm_flow_state_match(fl, xfrm); } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) +{ + return security_ops->xfrm_decode_session(skb, secid, 1); +} + +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) { - return security_ops->xfrm_decode_session(skb, fl); + int rc = security_ops->xfrm_decode_session(skb, &fl->secid, 0); + + BUG_ON(rc); } #else /* CONFIG_SECURITY_NETWORK_XFRM */ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) @@ -3176,11 +3184,15 @@ static inline int security_xfrm_flow_state_match(struct flowi *fl, return 1; } -static inline int security_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static inline int security_xfrm_decode_session(struct sk_buff *skb, u32 *secid) { return 0; } +static inline void security_skb_classify_flow(struct sk_buff *skb, struct flowi *fl) +{ +} + #endif /* CONFIG_SECURITY_NETWORK_XFRM */ #ifdef CONFIG_KEYS diff --git a/include/net/route.h b/include/net/route.h index c4a068692dcc..7f93ac0e0899 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -32,6 +32,7 @@ #include #include #include +#include #ifndef __KERNEL__ #warning This file is not supposed to be used outside of kernel. @@ -166,6 +167,7 @@ static inline int ip_route_connect(struct rtable **rp, u32 dst, ip_rt_put(*rp); *rp = NULL; } + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } @@ -182,6 +184,7 @@ static inline int ip_route_newports(struct rtable **rp, u8 protocol, fl.proto = protocol; ip_rt_put(*rp); *rp = NULL; + security_sk_classify_flow(sk, &fl); return ip_route_output_flow(rp, &fl, sk, 0); } return 0; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 7f56f7e8f571..386498053b1c 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -678,6 +678,7 @@ static struct dst_entry* dccp_v4_route_skb(struct sock *sk, } }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 610c722ac27f..53d255c01431 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -201,6 +201,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = usin->sin6_port; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt != NULL && np->opt->srcrt != NULL) { const struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; @@ -322,6 +323,7 @@ static void dccp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) { @@ -422,6 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -566,6 +569,7 @@ static void dccp_v6_ctl_send_reset(struct sk_buff *rxskb) fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { @@ -622,6 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; + security_skb_classify_flow(rxskb, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -842,6 +847,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index c84a32070f8d..fc40da3b6d39 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1074,6 +1074,7 @@ int inet_sk_rebuild_header(struct sock *sk) }, }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, 0); } if (!err) diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 4c86ac3d882d..6ad797c14163 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -406,6 +406,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) .saddr = rt->rt_spec_dst, .tos = RT_TOS(skb->nh.iph->tos) } }, .proto = IPPROTO_ICMP }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } @@ -560,6 +561,7 @@ void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) } } }; + security_skb_classify_flow(skb_in, &fl); if (ip_route_output_key(&rt, &fl)) goto out_unlock; } diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e50a1bfd7ccc..772b4eac78bc 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,6 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a2ede167e045..308bdeac3455 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -328,6 +328,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) * keep trying until route appears or the connection times * itself out. */ + security_sk_classify_flow(sk, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) goto no_route; } @@ -1366,6 +1367,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar { .sport = skb->h.th->dest, .dport = skb->h.th->source } }, .proto = sk->sk_protocol }; + security_skb_classify_flow(skb, &fl); if (ip_route_output_key(&rt, &fl)) return; } diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 269bc2067cb8..7f905bf2bde5 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -90,6 +90,7 @@ static inline struct rtable *route_reverse(struct sk_buff *skb, fl.proto = IPPROTO_TCP; fl.fl_ip_sport = tcph->dest; fl.fl_ip_dport = tcph->source; + security_skb_classify_flow(skb, &fl); xfrm_lookup((struct dst_entry **)&rt, &fl, NULL, 0); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 62b2762a2420..fe44cb50a1c5 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -484,6 +484,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, if (!inet->hdrincl) raw_probe_proto_opt(&fl, msg); + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); } if (err) diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index e20be3331f67..307dc3c0d635 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -259,6 +259,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; + security_sk_classify_flow(sk, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index f136cec96d95..a4d005eccc7f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -603,6 +603,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, .uli_u = { .ports = { .sport = inet->sport, .dport = dport } } }; + security_sk_classify_flow(sk, &fl); err = ip_route_output_flow(&rt, &fl, sk, !(msg->msg_flags&MSG_DONTWAIT)); if (err) goto out; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index ac85e9c532c2..82a1b1a328db 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -637,6 +637,7 @@ int inet6_sk_rebuild_header(struct sock *sk) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) np->opt->srcrt; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index 3b55b4c8e2d1..c73508e090a6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -156,6 +156,8 @@ ipv4_connected: if (!fl.oif && (addr_type&IPV6_ADDR_MULTICAST)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); + if (flowlabel) { if (flowlabel->opt && flowlabel->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *) flowlabel->opt->srcrt; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index 356a8a7ef22a..dbfce089e916 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -358,6 +358,7 @@ void icmpv6_send(struct sk_buff *skb, int type, int code, __u32 info, fl.oif = iif; fl.fl_icmp_type = type; fl.fl_icmp_code = code; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; @@ -472,6 +473,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) ipv6_addr_copy(&fl.fl6_src, saddr); fl.oif = skb->dev->ifindex; fl.fl_icmp_type = ICMPV6_ECHO_REPLY; + security_skb_classify_flow(skb, &fl); if (icmpv6_xmit_lock()) return; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index bf491077b822..7a51a258615d 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -157,6 +157,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) fl.oif = sk->sk_bound_dev_if; fl.fl_ip_sport = inet->sport; fl.fl_ip_dport = inet->dport; + security_sk_classify_flow(sk, &fl); if (np->opt && np->opt->srcrt) { struct rt0_hdr *rt0 = (struct rt0_hdr *)np->opt->srcrt; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index b50055b9278d..67cfc3813c32 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -419,6 +419,7 @@ static inline void ndisc_flow_init(struct flowi *fl, u8 type, fl->proto = IPPROTO_ICMPV6; fl->fl_icmp_type = type; fl->fl_icmp_code = 0; + security_sk_classify_flow(ndisc_socket->sk, fl); } static void ndisc_send_na(struct net_device *dev, struct neighbour *neigh, diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 8629ba195d2d..c4eba1aeb323 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -96,6 +96,7 @@ static void send_reset(struct sk_buff *oldskb) ipv6_addr_copy(&fl.fl6_dst, &oip6h->saddr); fl.fl_ip_sport = otcph.dest; fl.fl_ip_dport = otcph.source; + security_skb_classify_flow(oldskb, &fl); dst = ip6_route_output(NULL, &fl); if (dst == NULL) return; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index 15b862d8acab..d5040e172292 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -759,6 +759,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, if (!fl.oif && ipv6_addr_is_multicast(&fl.fl6_dst)) fl.oif = np->mcast_oif; + security_sk_classify_flow(sk, &fl); err = ip6_dst_lookup(sk, &dst, &fl); if (err) diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 802a1a6b1037..46922e57e311 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -251,6 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, final_p = &final; } + security_sk_classify_flow(sk, &fl); + err = ip6_dst_lookup(sk, &dst, &fl); if (err) goto failure; @@ -374,6 +376,7 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet->dport; fl.fl_ip_sport = inet->sport; + security_skb_classify_flow(skb, &fl); if ((err = ip6_dst_lookup(sk, &dst, &fl))) { sk->sk_err_soft = -err; @@ -467,6 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (dst == NULL) { opt = np->opt; @@ -625,6 +629,7 @@ static void tcp_v6_send_reset(struct sk_buff *skb) fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); /* sk = NULL, but it is safe for now. RST socket required. */ if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { @@ -691,6 +696,7 @@ static void tcp_v6_send_ack(struct sk_buff *skb, u32 seq, u32 ack, u32 win, u32 fl.oif = inet6_iif(skb); fl.fl_ip_dport = t1->dest; fl.fl_ip_sport = t1->source; + security_skb_classify_flow(skb, &fl); if (!ip6_dst_lookup(NULL, &buff->dst, &fl)) { if (xfrm_lookup(&buff->dst, &fl, NULL, 0) >= 0) { @@ -923,6 +929,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; + security_sk_classify_flow(sk, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 3d54f246411e..82c7c9cde2a8 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -782,6 +782,8 @@ do_udp_sendmsg: connected = 0; } + security_sk_classify_flow(sk, fl); + err = ip6_sk_dst_lookup(sk, &dst, fl); if (err) goto out; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 79405daadc52..32c963c90573 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -863,7 +863,6 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, u16 family; u8 dir = policy_to_flow_dir(XFRM_POLICY_OUT); - fl->secid = security_sk_sid(sk, fl, dir); restart: genid = atomic_read(&flow_cache_genid); policy = NULL; @@ -1039,7 +1038,7 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family return -EAFNOSUPPORT; afinfo->decode_session(skb, fl); - err = security_xfrm_decode_session(skb, fl); + err = security_xfrm_decode_session(skb, &fl->secid); xfrm_policy_put_afinfo(afinfo); return err; } diff --git a/security/dummy.c b/security/dummy.c index c1f10654871e..c0ff6b9bfd7d 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -809,9 +809,8 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * { } -static unsigned int dummy_sk_getsid(struct sock *sk, struct flowi *fl, u8 dir) +static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { - return 0; } #endif /* CONFIG_SECURITY_NETWORK */ @@ -866,7 +865,7 @@ static int dummy_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm return 1; } -static int dummy_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +static int dummy_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall) { return 0; } @@ -1083,7 +1082,7 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_alloc_security); set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); - set_to_dummy_if_null(ops, sk_getsid); + set_to_dummy_if_null(ops, sk_getsecid); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 5c189da07bc9..4e5989d584ce 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3561,14 +3561,14 @@ static void selinux_sk_clone_security(const struct sock *sk, struct sock *newsk) newssec->peer_sid = ssec->peer_sid; } -static unsigned int selinux_sk_getsid_security(struct sock *sk, struct flowi *fl, u8 dir) +static void selinux_sk_getsecid(struct sock *sk, u32 *secid) { if (!sk) - return selinux_no_sk_sid(fl); + *secid = SECINITSID_ANY_SOCKET; else { struct sk_security_struct *sksec = sk->sk_security; - return sksec->sid; + *secid = sksec->sid; } } @@ -4622,7 +4622,7 @@ static struct security_operations selinux_ops = { .sk_alloc_security = selinux_sk_alloc_security, .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, - .sk_getsid = selinux_sk_getsid_security, + .sk_getsecid = selinux_sk_getsecid, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index f51a3e84bd9b..8e45c1d588a8 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -19,7 +19,7 @@ int selinux_xfrm_policy_lookup(struct xfrm_policy *xp, u32 fl_secid, u8 dir); int selinux_xfrm_state_pol_flow_match(struct xfrm_state *x, struct xfrm_policy *xp, struct flowi *fl); int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm); -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl); +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *fl, int ckall); /* @@ -33,18 +33,6 @@ static inline struct inode_security_struct *get_sock_isec(struct sock *sk) return SOCK_INODE(sk->sk_socket)->i_security; } - -static inline u32 selinux_no_sk_sid(struct flowi *fl) -{ - /* NOTE: no sock occurs on ICMP reply, forwards, ... */ - /* icmp_reply: authorize as kernel packet */ - if (fl && fl->proto == IPPROTO_ICMP) { - return SECINITSID_KERNEL; - } - - return SECINITSID_ANY_SOCKET; -} - #ifdef CONFIG_SECURITY_NETWORK_XFRM int selinux_xfrm_sock_rcv_skb(u32 sid, struct sk_buff *skb, struct avc_audit_data *ad); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index a502b0540e3d..c750ef7af66f 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -158,11 +158,11 @@ int selinux_xfrm_flow_state_match(struct flowi *fl, struct xfrm_state *xfrm) * LSM hook implementation that determines the sid for the session. */ -int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) +int selinux_xfrm_decode_session(struct sk_buff *skb, u32 *sid, int ckall) { struct sec_path *sp; - fl->secid = SECSID_NULL; + *sid = SECSID_NULL; if (skb == NULL) return 0; @@ -177,10 +177,13 @@ int selinux_xfrm_decode_session(struct sk_buff *skb, struct flowi *fl) struct xfrm_sec_ctx *ctx = x->security; if (!sid_set) { - fl->secid = ctx->ctx_sid; + *sid = ctx->ctx_sid; sid_set = 1; + + if (!ckall) + break; } - else if (fl->secid != ctx->ctx_sid) + else if (*sid != ctx->ctx_sid) return -EINVAL; } } -- cgit v1.3-8-gc7d7 From cb969f072b6d67770b559617f14e767f47e77ece Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:20 -0700 Subject: [MLSXFRM]: Default labeling of socket specific IPSec policies This defaults the label of socket-specific IPSec policies to be the same as the socket they are set on. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 19 ++++++++++++++++--- include/net/xfrm.h | 2 +- net/key/af_key.c | 15 +++++++++++---- net/xfrm/xfrm_state.c | 2 +- net/xfrm/xfrm_user.c | 13 +++++++++++-- security/dummy.c | 3 ++- security/selinux/include/xfrm.h | 3 ++- security/selinux/xfrm.c | 33 ++++++++++++++++++++++----------- 8 files changed, 66 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/linux/security.h b/include/linux/security.h index f3909d189fe0..8e3dc6c51a6d 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -827,8 +827,10 @@ struct swap_info_struct; * used by the XFRM system. * @sec_ctx contains the security context information being provided by * the user-level policy update program (e.g., setkey). + * @sk refers to the sock from which to derive the security context. * Allocate a security structure to the xp->security field; the security - * field is initialized to NULL when the xfrm_policy is allocated. + * field is initialized to NULL when the xfrm_policy is allocated. Only + * one of sec_ctx or sock can be specified. * Return 0 if operation was successful (memory to allocate, legal context) * @xfrm_policy_clone_security: * @old contains an existing xfrm_policy in the SPD. @@ -1359,7 +1361,8 @@ struct security_operations { #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM - int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); + int (*xfrm_policy_alloc_security) (struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int (*xfrm_policy_clone_security) (struct xfrm_policy *old, struct xfrm_policy *new); void (*xfrm_policy_free_security) (struct xfrm_policy *xp); int (*xfrm_policy_delete_security) (struct xfrm_policy *xp); @@ -3057,7 +3060,12 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) #ifdef CONFIG_SECURITY_NETWORK_XFRM static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) { - return security_ops->xfrm_policy_alloc_security(xp, sec_ctx); + return security_ops->xfrm_policy_alloc_security(xp, sec_ctx, NULL); +} + +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return security_ops->xfrm_policy_alloc_security(xp, NULL, sk); } static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) @@ -3132,6 +3140,11 @@ static inline int security_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm return 0; } +static inline int security_xfrm_sock_policy_alloc(struct xfrm_policy *xp, struct sock *sk) +{ + return 0; +} + static inline int security_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new) { return 0; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3ecd9fa1ed4b..00bf86e6e82b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -362,7 +362,7 @@ struct xfrm_mgr char *id; int (*notify)(struct xfrm_state *x, struct km_event *c); int (*acquire)(struct xfrm_state *x, struct xfrm_tmpl *, struct xfrm_policy *xp, int dir); - struct xfrm_policy *(*compile_policy)(u16 family, int opt, u8 *data, int len, int *dir); + struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); }; diff --git a/net/key/af_key.c b/net/key/af_key.c index a065e1a67773..797c744a8438 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -2843,14 +2843,14 @@ static int pfkey_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *t, struct return pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_REGISTERED, NULL); } -static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, +static struct xfrm_policy *pfkey_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_policy *xp; struct sadb_x_policy *pol = (struct sadb_x_policy*)data; struct sadb_x_sec_ctx *sec_ctx; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_IPSEC_POLICY) { *dir = -EOPNOTSUPP; @@ -2891,7 +2891,7 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, xp->lft.hard_byte_limit = XFRM_INF; xp->lft.soft_packet_limit = XFRM_INF; xp->lft.hard_packet_limit = XFRM_INF; - xp->family = family; + xp->family = sk->sk_family; xp->xfrm_nr = 0; if (pol->sadb_x_policy_type == IPSEC_POLICY_IPSEC && @@ -2907,8 +2907,10 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, p += pol->sadb_x_policy_len*8; sec_ctx = (struct sadb_x_sec_ctx *)p; if (len < pol->sadb_x_policy_len*8 + - sec_ctx->sadb_x_sec_len) + sec_ctx->sadb_x_sec_len) { + *dir = -EINVAL; goto out; + } if ((*dir = verify_sec_ctx_len(p))) goto out; uctx = pfkey_sadb2xfrm_user_sec_ctx(sec_ctx); @@ -2918,6 +2920,11 @@ static struct xfrm_policy *pfkey_compile_policy(u16 family, int opt, if (*dir) goto out; } + else { + *dir = security_xfrm_sock_policy_alloc(xp, sk); + if (*dir) + goto out; + } *dir = pol->sadb_x_policy_dir-1; return xp; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index be02bd981d12..1c796087ee78 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1026,7 +1026,7 @@ int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen err = -EINVAL; read_lock(&xfrm_km_lock); list_for_each_entry(km, &xfrm_km_list, list) { - pol = km->compile_policy(sk->sk_family, optname, data, + pol = km->compile_policy(sk, optname, data, optlen, &err); if (err >= 0) break; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index dac8db1088bc..f70e158874d2 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1757,7 +1757,7 @@ static int xfrm_send_acquire(struct xfrm_state *x, struct xfrm_tmpl *xt, /* User gives us xfrm_user_policy_info followed by an array of 0 * or more templates. */ -static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, +static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, u8 *data, int len, int *dir) { struct xfrm_userpolicy_info *p = (struct xfrm_userpolicy_info *)data; @@ -1765,7 +1765,7 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, struct xfrm_policy *xp; int nr; - switch (family) { + switch (sk->sk_family) { case AF_INET: if (opt != IP_XFRM_POLICY) { *dir = -EOPNOTSUPP; @@ -1807,6 +1807,15 @@ static struct xfrm_policy *xfrm_compile_policy(u16 family, int opt, copy_from_user_policy(xp, p); copy_templates(xp, ut, nr); + if (!xp->security) { + int err = security_xfrm_sock_policy_alloc(xp, sk); + if (err) { + kfree(xp); + *dir = err; + return NULL; + } + } + *dir = p->dir; return xp; diff --git a/security/dummy.c b/security/dummy.c index c0ff6b9bfd7d..66cc06404930 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -815,7 +815,8 @@ static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM -static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx) +static int dummy_xfrm_policy_alloc_security(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk) { return 0; } diff --git a/security/selinux/include/xfrm.h b/security/selinux/include/xfrm.h index 8e45c1d588a8..1822c73e5085 100644 --- a/security/selinux/include/xfrm.h +++ b/security/selinux/include/xfrm.h @@ -7,7 +7,8 @@ #ifndef _SELINUX_XFRM_H_ #define _SELINUX_XFRM_H_ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *sec_ctx); +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *sec_ctx, struct sock *sk); int selinux_xfrm_policy_clone(struct xfrm_policy *old, struct xfrm_policy *new); void selinux_xfrm_policy_free(struct xfrm_policy *xp); int selinux_xfrm_policy_delete(struct xfrm_policy *xp); diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index c750ef7af66f..d3690f985135 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -208,10 +208,8 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, BUG_ON(uctx && pol); - if (pol) - goto from_policy; - - BUG_ON(!uctx); + if (!uctx) + goto not_from_user; if (uctx->ctx_doi != XFRM_SC_ALG_SELINUX) return -EINVAL; @@ -251,11 +249,14 @@ static int selinux_xfrm_sec_ctx_alloc(struct xfrm_sec_ctx **ctxp, return rc; -from_policy: - BUG_ON(!pol); - rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); - if (rc) - goto out; +not_from_user: + if (pol) { + rc = security_sid_mls_copy(pol->ctx_sid, sid, &ctx_sid); + if (rc) + goto out; + } + else + ctx_sid = sid; rc = security_sid_to_context(ctx_sid, &ctx_str, &str_len); if (rc) @@ -293,13 +294,23 @@ out2: * LSM hook implementation that allocs and transfers uctx spec to * xfrm_policy. */ -int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, struct xfrm_user_sec_ctx *uctx) +int selinux_xfrm_policy_alloc(struct xfrm_policy *xp, + struct xfrm_user_sec_ctx *uctx, struct sock *sk) { int err; + u32 sid; BUG_ON(!xp); + BUG_ON(uctx && sk); + + if (sk) { + struct sk_security_struct *ssec = sk->sk_security; + sid = ssec->sid; + } + else + sid = SECSID_NULL; - err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, 0); + err = selinux_xfrm_sec_ctx_alloc(&xp->security, uctx, NULL, sid); return err; } -- cgit v1.3-8-gc7d7 From 4237c75c0a35535d7f9f2bfeeb4b4df1e068a0bf Mon Sep 17 00:00:00 2001 From: Venkat Yekkirala Date: Mon, 24 Jul 2006 23:32:50 -0700 Subject: [MLSXFRM]: Auto-labeling of child sockets This automatically labels the TCP, Unix stream, and dccp child sockets as well as openreqs to be at the same MLS level as the peer. This will result in the selection of appropriately labeled IPSec Security Associations. This also uses the sock's sid (as opposed to the isec sid) in SELinux enforcement of secmark in rcv_skb and postroute_last hooks. Signed-off-by: Venkat Yekkirala Signed-off-by: David S. Miller --- include/linux/security.h | 55 ++++++++++++++++ include/net/request_sock.h | 1 + include/net/sock.h | 1 + net/dccp/ipv4.c | 3 + net/dccp/ipv6.c | 7 +- net/ipv4/inet_connection_sock.c | 4 +- net/ipv4/syncookies.c | 6 +- net/ipv4/tcp_ipv4.c | 3 + net/ipv6/tcp_ipv6.c | 6 +- security/dummy.c | 24 +++++++ security/selinux/hooks.c | 137 +++++++++++++++++++++++++++------------- security/selinux/xfrm.c | 1 - 12 files changed, 197 insertions(+), 51 deletions(-) (limited to 'include/net') diff --git a/include/linux/security.h b/include/linux/security.h index 8e3dc6c51a6d..bb4c80fdfe7a 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -90,6 +90,7 @@ extern int cap_netlink_recv(struct sk_buff *skb, int cap); struct nfsctl_arg; struct sched_param; struct swap_info_struct; +struct request_sock; /* bprm_apply_creds unsafe reasons */ #define LSM_UNSAFE_SHARE 1 @@ -819,6 +820,14 @@ struct swap_info_struct; * @sk_getsecid: * Retrieve the LSM-specific secid for the sock to enable caching of network * authorizations. + * @sock_graft: + * Sets the socket's isec sid to the sock's sid. + * @inet_conn_request: + * Sets the openreq's sid to socket's sid with MLS portion taken from peer sid. + * @inet_csk_clone: + * Sets the new child socket's sid to the openreq sid. + * @req_classify_flow: + * Sets the flow's sid to the openreq sid. * * Security hooks for XFRM operations. * @@ -1358,6 +1367,11 @@ struct security_operations { void (*sk_free_security) (struct sock *sk); void (*sk_clone_security) (const struct sock *sk, struct sock *newsk); void (*sk_getsecid) (struct sock *sk, u32 *secid); + void (*sock_graft)(struct sock* sk, struct socket *parent); + int (*inet_conn_request)(struct sock *sk, struct sk_buff *skb, + struct request_sock *req); + void (*inet_csk_clone)(struct sock *newsk, const struct request_sock *req); + void (*req_classify_flow)(const struct request_sock *req, struct flowi *fl); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -2926,6 +2940,28 @@ static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { security_ops->sk_getsecid(sk, &fl->secid); } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + security_ops->req_classify_flow(req, fl); +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ + security_ops->sock_graft(sk, parent); +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return security_ops->inet_conn_request(sk, skb, req); +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ + security_ops->inet_csk_clone(newsk, req); +} #else /* CONFIG_SECURITY_NETWORK */ static inline int security_unix_stream_connect(struct socket * sock, struct socket * other, @@ -3055,6 +3091,25 @@ static inline void security_sk_clone(const struct sock *sk, struct sock *newsk) static inline void security_sk_classify_flow(struct sock *sk, struct flowi *fl) { } + +static inline void security_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ +} + +static inline void security_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int security_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void security_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c5d7f920c352..8e165ca16bd8 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -53,6 +53,7 @@ struct request_sock { unsigned long expires; struct request_sock_ops *rsk_ops; struct sock *sk; + u32 secid; }; static inline struct request_sock *reqsk_alloc(struct request_sock_ops *ops) diff --git a/include/net/sock.h b/include/net/sock.h index 91cdceb3c028..337ebec84c70 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -969,6 +969,7 @@ static inline void sock_graft(struct sock *sk, struct socket *parent) sk->sk_sleep = &parent->wait; parent->sk = sk; sk->sk_socket = parent; + security_sock_graft(sk, parent); write_unlock_bh(&sk->sk_callback_lock); } diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 386498053b1c..171d363876ee 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -501,6 +501,9 @@ int dccp_v4_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 53d255c01431..231bc7c7e749 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -424,7 +424,7 @@ static int dccp_v6_send_response(struct sock *sk, struct request_sock *req, fl.oif = ireq6->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -626,7 +626,7 @@ static void dccp_v6_reqsk_send_ack(struct sk_buff *rxskb, fl.oif = inet6_iif(rxskb); fl.fl_ip_dport = dh->dccph_dport; fl.fl_ip_sport = dh->dccph_sport; - security_skb_classify_flow(rxskb, &fl); + security_req_classify_flow(req, &fl); if (!ip6_dst_lookup(NULL, &skb->dst, &fl)) { if (xfrm_lookup(&skb->dst, &fl, NULL, 0) >= 0) { @@ -709,6 +709,9 @@ static int dccp_v6_conn_request(struct sock *sk, struct sk_buff *skb) dccp_openreq_init(req, &dp, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq6 = inet6_rsk(req); ireq = inet_rsk(req); ipv6_addr_copy(&ireq6->rmt_addr, &skb->nh.ipv6h->saddr); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 772b4eac78bc..07204391d083 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -327,7 +327,7 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, { .sport = inet_sk(sk)->sport, .dport = ireq->rmt_port } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_flow(&rt, &fl, sk, 0)) { IP_INC_STATS_BH(IPSTATS_MIB_OUTNOROUTES); return NULL; @@ -510,6 +510,8 @@ struct sock *inet_csk_clone(struct sock *sk, const struct request_sock *req, /* Deinitialize accept_queue to trap illegal accesses. */ memset(&newicsk->icsk_accept_queue, 0, sizeof(newicsk->icsk_accept_queue)); + + security_inet_csk_clone(newsk, req); } return newsk; } diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 307dc3c0d635..661e0a4bca72 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -214,6 +214,10 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, if (!req) goto out; + if (security_inet_conn_request(sk, skb, req)) { + reqsk_free(req); + goto out; + } ireq = inet_rsk(req); treq = tcp_rsk(req); treq->rcv_isn = htonl(skb->h.th->seq) - 1; @@ -259,7 +263,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, .uli_u = { .ports = { .sport = skb->h.th->dest, .dport = skb->h.th->source } } }; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip_route_output_key(&rt, &fl)) { reqsk_free(req); goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 4b04c3edd4a9..43f6740244f8 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -798,6 +798,9 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) tcp_openreq_init(req, &tmp_opt, skb); + if (security_inet_conn_request(sk, skb, req)) + goto drop_and_free; + ireq = inet_rsk(req); ireq->loc_addr = daddr; ireq->rmt_addr = saddr; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 46922e57e311..302786a11cd6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -470,7 +470,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct request_sock *req, fl.oif = treq->iif; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (dst == NULL) { opt = np->opt; @@ -826,6 +826,8 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) tcp_rsk(req)->snt_isn = isn; + security_inet_conn_request(sk, skb, req); + if (tcp_v6_send_synack(sk, req, NULL)) goto drop; @@ -929,7 +931,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, fl.oif = sk->sk_bound_dev_if; fl.fl_ip_dport = inet_rsk(req)->rmt_port; fl.fl_ip_sport = inet_sk(sk)->sport; - security_sk_classify_flow(sk, &fl); + security_req_classify_flow(req, &fl); if (ip6_dst_lookup(sk, &dst, &fl)) goto out; diff --git a/security/dummy.c b/security/dummy.c index 66cc06404930..1c45f8e4aad1 100644 --- a/security/dummy.c +++ b/security/dummy.c @@ -812,6 +812,26 @@ static inline void dummy_sk_clone_security (const struct sock *sk, struct sock * static inline void dummy_sk_getsecid(struct sock *sk, u32 *secid) { } + +static inline void dummy_sock_graft(struct sock* sk, struct socket *parent) +{ +} + +static inline int dummy_inet_conn_request(struct sock *sk, + struct sk_buff *skb, struct request_sock *req) +{ + return 0; +} + +static inline void dummy_inet_csk_clone(struct sock *newsk, + const struct request_sock *req) +{ +} + +static inline void dummy_req_classify_flow(const struct request_sock *req, + struct flowi *fl) +{ +} #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM @@ -1084,6 +1104,10 @@ void security_fixup_ops (struct security_operations *ops) set_to_dummy_if_null(ops, sk_free_security); set_to_dummy_if_null(ops, sk_clone_security); set_to_dummy_if_null(ops, sk_getsecid); + set_to_dummy_if_null(ops, sock_graft); + set_to_dummy_if_null(ops, inet_conn_request); + set_to_dummy_if_null(ops, inet_csk_clone); + set_to_dummy_if_null(ops, req_classify_flow); #endif /* CONFIG_SECURITY_NETWORK */ #ifdef CONFIG_SECURITY_NETWORK_XFRM set_to_dummy_if_null(ops, xfrm_policy_alloc_security); diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 4e5989d584ce..1dc935f7b919 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3328,8 +3328,9 @@ static int selinux_socket_unix_stream_connect(struct socket *sock, /* server child socket */ ssec = newsk->sk_security; ssec->peer_sid = isec->sid; - - return 0; + err = security_sid_mls_copy(other_isec->sid, ssec->peer_sid, &ssec->sid); + + return err; } static int selinux_socket_unix_may_send(struct socket *sock, @@ -3355,11 +3356,29 @@ static int selinux_socket_unix_may_send(struct socket *sock, } static int selinux_sock_rcv_skb_compat(struct sock *sk, struct sk_buff *skb, - struct avc_audit_data *ad, u32 sock_sid, u16 sock_class, - u16 family, char *addrp, int len) + struct avc_audit_data *ad, u16 family, char *addrp, int len) { int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, recv_perm = 0; + struct socket *sock; + u16 sock_class = 0; + u32 sock_sid = 0; + + read_lock_bh(&sk->sk_callback_lock); + sock = sk->sk_socket; + if (sock) { + struct inode *inode; + inode = SOCK_INODE(sock); + if (inode) { + struct inode_security_struct *isec; + isec = inode->i_security; + sock_sid = isec->sid; + sock_class = isec->sclass; + } + } + read_unlock_bh(&sk->sk_callback_lock); + if (!sock_sid) + goto out; if (!skb->dev) goto out; @@ -3419,12 +3438,10 @@ out: static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) { u16 family; - u16 sock_class = 0; char *addrp; int len, err = 0; - u32 sock_sid = 0; - struct socket *sock; struct avc_audit_data ad; + struct sk_security_struct *sksec = sk->sk_security; family = sk->sk_family; if (family != PF_INET && family != PF_INET6) @@ -3434,22 +3451,6 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) if (family == PF_INET6 && skb->protocol == ntohs(ETH_P_IP)) family = PF_INET; - read_lock_bh(&sk->sk_callback_lock); - sock = sk->sk_socket; - if (sock) { - struct inode *inode; - inode = SOCK_INODE(sock); - if (inode) { - struct inode_security_struct *isec; - isec = inode->i_security; - sock_sid = isec->sid; - sock_class = isec->sclass; - } - } - read_unlock_bh(&sk->sk_callback_lock); - if (!sock_sid) - goto out; - AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = skb->dev ? skb->dev->name : "[unknown]"; ad.u.net.family = family; @@ -3459,16 +3460,15 @@ static int selinux_socket_sock_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; if (selinux_compat_net) - err = selinux_sock_rcv_skb_compat(sk, skb, &ad, sock_sid, - sock_class, family, + err = selinux_sock_rcv_skb_compat(sk, skb, &ad, family, addrp, len); else - err = avc_has_perm(sock_sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__RECV, &ad); if (err) goto out; - err = selinux_xfrm_sock_rcv_skb(sock_sid, skb, &ad); + err = selinux_xfrm_sock_rcv_skb(sksec->sid, skb, &ad); out: return err; } @@ -3572,6 +3572,49 @@ static void selinux_sk_getsecid(struct sock *sk, u32 *secid) } } +void selinux_sock_graft(struct sock* sk, struct socket *parent) +{ + struct inode_security_struct *isec = SOCK_INODE(parent)->i_security; + struct sk_security_struct *sksec = sk->sk_security; + + isec->sid = sksec->sid; +} + +int selinux_inet_conn_request(struct sock *sk, struct sk_buff *skb, + struct request_sock *req) +{ + struct sk_security_struct *sksec = sk->sk_security; + int err; + u32 newsid = 0; + u32 peersid; + + err = selinux_xfrm_decode_session(skb, &peersid, 0); + BUG_ON(err); + + err = security_sid_mls_copy(sksec->sid, peersid, &newsid); + if (err) + return err; + + req->secid = newsid; + return 0; +} + +void selinux_inet_csk_clone(struct sock *newsk, const struct request_sock *req) +{ + struct sk_security_struct *newsksec = newsk->sk_security; + + newsksec->sid = req->secid; + /* NOTE: Ideally, we should also get the isec->sid for the + new socket in sync, but we don't have the isec available yet. + So we will wait until sock_graft to do it, by which + time it will have been created and available. */ +} + +void selinux_req_classify_flow(const struct request_sock *req, struct flowi *fl) +{ + fl->secid = req->secid; +} + static int selinux_nlmsg_perm(struct sock *sk, struct sk_buff *skb) { int err = 0; @@ -3611,12 +3654,24 @@ out: #ifdef CONFIG_NETFILTER static int selinux_ip_postroute_last_compat(struct sock *sk, struct net_device *dev, - struct inode_security_struct *isec, struct avc_audit_data *ad, u16 family, char *addrp, int len) { - int err; + int err = 0; u32 netif_perm, node_perm, node_sid, if_sid, send_perm = 0; + struct socket *sock; + struct inode *inode; + struct inode_security_struct *isec; + + sock = sk->sk_socket; + if (!sock) + goto out; + + inode = SOCK_INODE(sock); + if (!inode) + goto out; + + isec = inode->i_security; err = sel_netif_sids(dev, &if_sid, NULL); if (err) @@ -3681,26 +3736,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, char *addrp; int len, err = 0; struct sock *sk; - struct socket *sock; - struct inode *inode; struct sk_buff *skb = *pskb; - struct inode_security_struct *isec; struct avc_audit_data ad; struct net_device *dev = (struct net_device *)out; + struct sk_security_struct *sksec; sk = skb->sk; if (!sk) goto out; - sock = sk->sk_socket; - if (!sock) - goto out; - - inode = SOCK_INODE(sock); - if (!inode) - goto out; - - isec = inode->i_security; + sksec = sk->sk_security; AVC_AUDIT_DATA_INIT(&ad, NET); ad.u.net.netif = dev->name; @@ -3711,16 +3756,16 @@ static unsigned int selinux_ip_postroute_last(unsigned int hooknum, goto out; if (selinux_compat_net) - err = selinux_ip_postroute_last_compat(sk, dev, isec, &ad, + err = selinux_ip_postroute_last_compat(sk, dev, &ad, family, addrp, len); else - err = avc_has_perm(isec->sid, skb->secmark, SECCLASS_PACKET, + err = avc_has_perm(sksec->sid, skb->secmark, SECCLASS_PACKET, PACKET__SEND, &ad); if (err) goto out; - err = selinux_xfrm_postroute_last(isec->sid, skb, &ad); + err = selinux_xfrm_postroute_last(sksec->sid, skb, &ad); out: return err ? NF_DROP : NF_ACCEPT; } @@ -4623,6 +4668,10 @@ static struct security_operations selinux_ops = { .sk_free_security = selinux_sk_free_security, .sk_clone_security = selinux_sk_clone_security, .sk_getsecid = selinux_sk_getsecid, + .sock_graft = selinux_sock_graft, + .inet_conn_request = selinux_inet_conn_request, + .inet_csk_clone = selinux_inet_csk_clone, + .req_classify_flow = selinux_req_classify_flow, #ifdef CONFIG_SECURITY_NETWORK_XFRM .xfrm_policy_alloc_security = selinux_xfrm_policy_alloc, diff --git a/security/selinux/xfrm.c b/security/selinux/xfrm.c index d3690f985135..3e742b850af6 100644 --- a/security/selinux/xfrm.c +++ b/security/selinux/xfrm.c @@ -271,7 +271,6 @@ not_from_user: goto out; } - ctx->ctx_doi = XFRM_SC_DOI_LSM; ctx->ctx_alg = XFRM_SC_ALG_SELINUX; ctx->ctx_sid = ctx_sid; -- cgit v1.3-8-gc7d7 From 11a03f78fbf15a866ba3bf6359a75cdfd1ced703 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 3 Aug 2006 16:46:20 -0700 Subject: [NetLabel]: core network changes Changes to the core network stack to support the NetLabel subsystem. This includes changes to the IPv4 option handling to support CIPSO labels. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/linux/ip.h | 1 + include/net/cipso_ipv4.h | 250 ++++++++++++++++++++++++++++++++++++++++ include/net/inet_sock.h | 2 +- include/net/netlabel.h | 291 +++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ah4.c | 2 +- net/ipv4/ip_options.c | 19 ++++ 6 files changed, 563 insertions(+), 2 deletions(-) create mode 100644 include/net/cipso_ipv4.h create mode 100644 include/net/netlabel.h (limited to 'include/net') diff --git a/include/linux/ip.h b/include/linux/ip.h index 4b55cf1df732..2f4600146f83 100644 --- a/include/linux/ip.h +++ b/include/linux/ip.h @@ -57,6 +57,7 @@ #define IPOPT_SEC (2 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_LSRR (3 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_TIMESTAMP (4 |IPOPT_MEASUREMENT) +#define IPOPT_CIPSO (6 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_RR (7 |IPOPT_CONTROL) #define IPOPT_SID (8 |IPOPT_CONTROL|IPOPT_COPY) #define IPOPT_SSRR (9 |IPOPT_CONTROL|IPOPT_COPY) diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h new file mode 100644 index 000000000000..c7175e725804 --- /dev/null +++ b/include/net/cipso_ipv4.h @@ -0,0 +1,250 @@ +/* + * CIPSO - Commercial IP Security Option + * + * This is an implementation of the CIPSO 2.2 protocol as specified in + * draft-ietf-cipso-ipsecurity-01.txt with additional tag types as found in + * FIPS-188, copies of both documents can be found in the Documentation + * directory. While CIPSO never became a full IETF RFC standard many vendors + * have chosen to adopt the protocol and over the years it has become a + * de-facto standard for labeled networking. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _CIPSO_IPV4_H +#define _CIPSO_IPV4_H + +#include +#include +#include +#include + +/* known doi values */ +#define CIPSO_V4_DOI_UNKNOWN 0x00000000 + +/* tag types */ +#define CIPSO_V4_TAG_INVALID 0 +#define CIPSO_V4_TAG_RBITMAP 1 +#define CIPSO_V4_TAG_ENUM 2 +#define CIPSO_V4_TAG_RANGE 5 +#define CIPSO_V4_TAG_PBITMAP 6 +#define CIPSO_V4_TAG_FREEFORM 7 + +/* doi mapping types */ +#define CIPSO_V4_MAP_UNKNOWN 0 +#define CIPSO_V4_MAP_STD 1 +#define CIPSO_V4_MAP_PASS 2 + +/* limits */ +#define CIPSO_V4_MAX_REM_LVLS 256 +#define CIPSO_V4_INV_LVL 0x80000000 +#define CIPSO_V4_MAX_LOC_LVLS (CIPSO_V4_INV_LVL - 1) +#define CIPSO_V4_MAX_REM_CATS 65536 +#define CIPSO_V4_INV_CAT 0x80000000 +#define CIPSO_V4_MAX_LOC_CATS (CIPSO_V4_INV_CAT - 1) + +/* + * CIPSO DOI definitions + */ + +/* DOI definition struct */ +#define CIPSO_V4_TAG_MAXCNT 5 +struct cipso_v4_doi { + u32 doi; + u32 type; + union { + struct cipso_v4_std_map_tbl *std; + } map; + u8 tags[CIPSO_V4_TAG_MAXCNT]; + + u32 valid; + struct list_head list; + struct rcu_head rcu; + struct list_head dom_list; +}; + +/* Standard CIPSO mapping table */ +/* NOTE: the highest order bit (i.e. 0x80000000) is an 'invalid' flag, if the + * bit is set then consider that value as unspecified, meaning the + * mapping for that particular level/category is invalid */ +struct cipso_v4_std_map_tbl { + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } lvl; + struct { + u32 *cipso; + u32 *local; + u32 cipso_size; + u32 local_size; + } cat; +}; + +/* + * Sysctl Variables + */ + +#ifdef CONFIG_NETLABEL +extern int cipso_v4_cache_enabled; +extern int cipso_v4_cache_bucketsize; +extern int cipso_v4_rbm_optfmt; +extern int cipso_v4_rbm_strictvalid; +#endif + +/* + * Helper Functions + */ + +#define CIPSO_V4_OPTEXIST(x) (IPCB(x)->opt.cipso != 0) +#define CIPSO_V4_OPTPTR(x) ((x)->nh.raw + IPCB(x)->opt.cipso) + +/* + * DOI List Functions + */ + +#ifdef CONFIG_NETLABEL +int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); +int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); +struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); +struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); +struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); +int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); +int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain); +#else +static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_remove(u32 doi, + void (*callback) (struct rcu_head * head)) +{ + return 0; +} + +static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +{ + return NULL; +} + +static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) +{ + return NULL; +} + +static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return -ENOSYS; +} + +static inline int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, + const char *domain) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Label Mapping Cache Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_cache_invalidate(void); +int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void cipso_v4_cache_invalidate(void) +{ + return; +} + +static inline int cipso_v4_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +/* + * Protocol Handling Functions + */ + +#ifdef CONFIG_NETLABEL +void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); +int cipso_v4_socket_setopt(struct socket *sock, + unsigned char *opt, + u32 opt_len); +int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr); +int cipso_v4_socket_getopt(const struct socket *sock, + unsigned char **opt, + u32 *opt_len); +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +int cipso_v4_validate(unsigned char **option); +#else +static inline void cipso_v4_error(struct sk_buff *skb, + int error, + u32 gateway) +{ + return; +} + +static inline int cipso_v4_socket_setattr(const struct socket *sock, + const struct cipso_v4_doi *doi_def, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int cipso_v4_validate(unsigned char **option) +{ + return -ENOSYS; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _CIPSO_IPV4_H */ diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 1f4a9a60d4cc..f4caad56cd03 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -51,7 +51,7 @@ struct ip_options { ts_needtime:1, ts_needaddr:1; unsigned char router_alert; - unsigned char __pad1; + unsigned char cipso; unsigned char __pad2; unsigned char __data[0]; }; diff --git a/include/net/netlabel.h b/include/net/netlabel.h new file mode 100644 index 000000000000..7cae730832c7 --- /dev/null +++ b/include/net/netlabel.h @@ -0,0 +1,291 @@ +/* + * NetLabel System + * + * The NetLabel system manages static and dynamic label mappings for network + * protocols such as CIPSO and RIPSO. + * + * Author: Paul Moore + * + */ + +/* + * (c) Copyright Hewlett-Packard Development Company, L.P., 2006 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See + * the GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ + +#ifndef _NETLABEL_H +#define _NETLABEL_H + +#include +#include +#include + +/* + * NetLabel - A management interface for maintaining network packet label + * mapping tables for explicit packet labling protocols. + * + * Network protocols such as CIPSO and RIPSO require a label translation layer + * to convert the label on the packet into something meaningful on the host + * machine. In the current Linux implementation these mapping tables live + * inside the kernel; NetLabel provides a mechanism for user space applications + * to manage these mapping tables. + * + * NetLabel makes use of the Generic NETLINK mechanism as a transport layer to + * send messages between kernel and user space. The general format of a + * NetLabel message is shown below: + * + * +-----------------+-------------------+--------- --- -- - + * | struct nlmsghdr | struct genlmsghdr | payload + * +-----------------+-------------------+--------- --- -- - + * + * The 'nlmsghdr' and 'genlmsghdr' structs should be dealt with like normal. + * The payload is dependent on the subsystem specified in the + * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions + * should be defined in the corresponding net/netlabel/netlabel_.h|c + * file. All of the fields in the NetLabel payload are NETLINK attributes, the + * length of each field is the length of the NETLINK attribute payload, see + * include/net/netlink.h for more information on NETLINK attributes. + * + */ + +/* + * NetLabel NETLINK protocol + */ + +#define NETLBL_PROTO_VERSION 1 + +/* NetLabel NETLINK types/families */ +#define NETLBL_NLTYPE_NONE 0 +#define NETLBL_NLTYPE_MGMT 1 +#define NETLBL_NLTYPE_MGMT_NAME "NLBL_MGMT" +#define NETLBL_NLTYPE_RIPSO 2 +#define NETLBL_NLTYPE_RIPSO_NAME "NLBL_RIPSO" +#define NETLBL_NLTYPE_CIPSOV4 3 +#define NETLBL_NLTYPE_CIPSOV4_NAME "NLBL_CIPSOv4" +#define NETLBL_NLTYPE_CIPSOV6 4 +#define NETLBL_NLTYPE_CIPSOV6_NAME "NLBL_CIPSOv6" +#define NETLBL_NLTYPE_UNLABELED 5 +#define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" + +/* NetLabel return codes */ +#define NETLBL_E_OK 0 + +/* + * Helper functions + */ + +#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) +#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) +#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) + +/** + * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer + * @head: the amount of headroom in bytes + * @body: the desired size (minus headroom) in bytes + * @gfp_flags: the alloc flags to pass to alloc_skb() + * + * Description: + * Allocate a NETLINK message buffer based on the sizes given in @head and + * @body. If @head is greater than zero skb_reserve() is called to reserve + * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on + * success, NULL on failure. + * + */ +static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, + size_t body, + int gfp_flags) +{ + struct sk_buff *skb; + + skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); + if (skb == NULL) + return NULL; + if (head > 0) { + skb_reserve(skb, head); + if (skb_tailroom(skb) < body) { + kfree_skb(skb); + return NULL; + } + } + + return skb; +} + +/* + * NetLabel - Kernel API for accessing the network packet label mappings. + * + * The following functions are provided for use by other kernel modules, + * specifically kernel LSM modules, to provide a consistent, transparent API + * for dealing with explicit packet labeling protocols such as CIPSO and + * RIPSO. The functions defined here are implemented in the + * net/netlabel/netlabel_kapi.c file. + * + */ + +/* Domain mapping definition struct */ +struct netlbl_dom_map; + +/* Domain mapping operations */ +int netlbl_domhsh_remove(const char *domain); + +/* LSM security attributes */ +struct netlbl_lsm_cache { + void (*free) (const void *data); + void *data; +}; +struct netlbl_lsm_secattr { + char *domain; + + u32 mls_lvl; + u32 mls_lvl_vld; + unsigned char *mls_cat; + size_t mls_cat_len; + + struct netlbl_lsm_cache cache; +}; + +/* + * LSM security attribute operations + */ + + +/** + * netlbl_secattr_init - Initialize a netlbl_lsm_secattr struct + * @secattr: the struct to initialize + * + * Description: + * Initialize an already allocated netlbl_lsm_secattr struct. Returns zero on + * success, negative values on error. + * + */ +static inline int netlbl_secattr_init(struct netlbl_lsm_secattr *secattr) +{ + memset(secattr, 0, sizeof(*secattr)); + return 0; +} + +/** + * netlbl_secattr_destroy - Clears a netlbl_lsm_secattr struct + * @secattr: the struct to clear + * @clear_cache: cache clear flag + * + * Description: + * Destroys the @secattr struct, including freeing all of the internal buffers. + * If @clear_cache is true then free the cache fields, otherwise leave them + * intact. The struct must be reset with a call to netlbl_secattr_init() + * before reuse. + * + */ +static inline void netlbl_secattr_destroy(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + if (clear_cache && secattr->cache.data != NULL && secattr->cache.free) + secattr->cache.free(secattr->cache.data); + kfree(secattr->domain); + kfree(secattr->mls_cat); +} + +/** + * netlbl_secattr_alloc - Allocate and initialize a netlbl_lsm_secattr struct + * @flags: the memory allocation flags + * + * Description: + * Allocate and initialize a netlbl_lsm_secattr struct. Returns a valid + * pointer on success, or NULL on failure. + * + */ +static inline struct netlbl_lsm_secattr *netlbl_secattr_alloc(int flags) +{ + return kzalloc(sizeof(struct netlbl_lsm_secattr), flags); +} + +/** + * netlbl_secattr_free - Frees a netlbl_lsm_secattr struct + * @secattr: the struct to free + * @clear_cache: cache clear flag + * + * Description: + * Frees @secattr including all of the internal buffers. If @clear_cache is + * true then free the cache fields, otherwise leave them intact. + * + */ +static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, + u32 clear_cache) +{ + netlbl_secattr_destroy(secattr, clear_cache); + kfree(secattr); +} + +/* + * LSM protocol operations + */ + +#ifdef CONFIG_NETLABEL +int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr); +int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr); +int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr); +void netlbl_skbuff_err(struct sk_buff *skb, int error); +#else +static inline int netlbl_socket_setattr(const struct socket *sock, + const struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline int netlbl_skbuff_getattr(const struct sk_buff *skb, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + +static inline void netlbl_skbuff_err(struct sk_buff *skb, int error) +{ + return; +} +#endif /* CONFIG_NETLABEL */ + +/* + * LSM label mapping cache operations + */ + +#ifdef CONFIG_NETLABEL +void netlbl_cache_invalidate(void); +int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr); +#else +static inline void netlbl_cache_invalidate(void) +{ + return; +} + +static inline int netlbl_cache_add(const struct sk_buff *skb, + const struct netlbl_lsm_secattr *secattr) +{ + return 0; +} +#endif /* CONFIG_NETLABEL */ + +#endif /* _NETLABEL_H */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 2b98943e6b02..008e69d2e423 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -35,7 +35,7 @@ static int ip_clear_mutable_options(struct iphdr *iph, u32 *daddr) switch (*optptr) { case IPOPT_SEC: case 0x85: /* Some "Extended Security" crap. */ - case 0x86: /* Another "Commercial Security" crap. */ + case IPOPT_CIPSO: case IPOPT_RA: case 0x80|21: /* RFC1770 */ break; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 406056edc02b..e0a93b4fa8cc 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -24,6 +24,7 @@ #include #include #include +#include /* * Write options to IP header, record destination address to @@ -194,6 +195,13 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->is_strictroute = sopt->is_strictroute; } } + if (sopt->cipso) { + optlen = sptr[sopt->cipso+1]; + dopt->cipso = dopt->optlen+sizeof(struct iphdr); + memcpy(dptr, sptr+sopt->cipso, optlen); + dptr += optlen; + dopt->optlen += optlen; + } while (dopt->optlen & 3) { *dptr++ = IPOPT_END; dopt->optlen++; @@ -434,6 +442,17 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) if (optptr[2] == 0 && optptr[3] == 0) opt->router_alert = optptr - iph; break; + case IPOPT_CIPSO: + if (opt->cipso) { + pp_ptr = optptr; + goto error; + } + opt->cipso = optptr - iph; + if (cipso_v4_validate(&optptr)) { + pp_ptr = optptr; + goto error; + } + break; case IPOPT_SEC: case IPOPT_SID: default: -- cgit v1.3-8-gc7d7 From c71099acce933455123ee505cc75964610a209ad Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:20:06 -0700 Subject: [IPV6]: Multiple Routing Tables Adds the framework to support multiple IPv6 routing tables. Currently all automatically generated routes are put into the same table. This could be changed at a later point after considering the produced locking overhead. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 39 ++++- include/net/ip6_route.h | 3 +- net/ipv6/Kconfig | 6 + net/ipv6/addrconf.c | 6 +- net/ipv6/ip6_fib.c | 144 +++++++++++++++++- net/ipv6/route.c | 380 ++++++++++++++++++++++++++++++++---------------- 6 files changed, 441 insertions(+), 137 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index a66e9de16a6c..818411519c89 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -51,6 +51,8 @@ struct rt6key int plen; }; +struct fib6_table; + struct rt6_info { union { @@ -71,6 +73,7 @@ struct rt6_info u32 rt6i_flags; u32 rt6i_metric; atomic_t rt6i_ref; + struct fib6_table *rt6i_table; struct rt6key rt6i_dst; struct rt6key rt6i_src; @@ -143,12 +146,43 @@ struct rt6_statistics { typedef void (*f_pnode)(struct fib6_node *fn, void *); -extern struct fib6_node ip6_routing_table; +struct fib6_table { + struct hlist_node tb6_hlist; + u32 tb6_id; + rwlock_t tb6_lock; + struct fib6_node tb6_root; +}; + +#define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC +#define RT6_TABLE_MAIN RT_TABLE_MAIN +#define RT6_TABLE_LOCAL RT6_TABLE_MAIN +#define RT6_TABLE_DFLT RT6_TABLE_MAIN +#define RT6_TABLE_INFO RT6_TABLE_MAIN +#define RT6_TABLE_PREFIX RT6_TABLE_MAIN + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB6_TABLE_MIN 1 +#define FIB6_TABLE_MAX RT_TABLE_MAX +#else +#define FIB6_TABLE_MIN RT_TABLE_MAIN +#define FIB6_TABLE_MAX FIB6_TABLE_MIN +#endif + +#define RT6_F_STRICT 1 +#define RT6_F_HAS_SADDR 2 + +typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *, + struct flowi *, int); /* * exported functions */ +extern struct fib6_table * fib6_get_table(u32 id); +extern struct fib6_table * fib6_new_table(u32 id); +extern struct dst_entry * fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup); + extern struct fib6_node *fib6_lookup(struct fib6_node *root, struct in6_addr *daddr, struct in6_addr *saddr); @@ -161,6 +195,9 @@ extern void fib6_clean_tree(struct fib6_node *root, int (*func)(struct rt6_info *, void *arg), int prune, void *arg); +extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), + int prune, void *arg); + extern int fib6_walk(struct fib6_walker_t *w); extern int fib6_walk_continue(struct fib6_walker_t *w); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 96b0e66406ec..d49c8c90eb68 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -58,7 +58,8 @@ extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); extern int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *, void *rtattr, - struct netlink_skb_parms *req); + struct netlink_skb_parms *req, + u32 table_id); extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, void *rtattr, diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 0ba06c0c5d39..159c63d99c81 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -136,3 +136,9 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_MULTIPLE_TABLES + bool "IPv6: Multiple Routing Tables" + depends on IPV6 && EXPERIMENTAL + ---help--- + Support multiple routing tables. + diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c7852b38e03e..318767fcefdc 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1525,7 +1525,7 @@ addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) rtmsg.rtmsg_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX); } /* Create "default" multicast route to the interface */ @@ -1542,7 +1542,7 @@ static void addrconf_add_mroute(struct net_device *dev) rtmsg.rtmsg_ifindex = dev->ifindex; rtmsg.rtmsg_flags = RTF_UP; rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL); } static void sit_route_add(struct net_device *dev) @@ -1559,7 +1559,7 @@ static void sit_route_add(struct net_device *dev) rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN); } static void addrconf_add_lroute(struct net_device *dev) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 764221220afd..fcd7da830aca 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -26,6 +26,7 @@ #include #include #include +#include #ifdef CONFIG_PROC_FS #include @@ -147,6 +148,126 @@ static __inline__ void rt6_release(struct rt6_info *rt) dst_free(&rt->u.dst); } +static struct fib6_table fib6_main_tbl = { + .tb6_id = RT6_TABLE_MAIN, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + +static struct fib6_table *fib6_alloc_table(u32 id) +{ + struct fib6_table *table; + + table = kzalloc(sizeof(*table), GFP_ATOMIC); + if (table != NULL) { + table->tb6_id = id; + table->tb6_lock = RW_LOCK_UNLOCKED; + table->tb6_root.leaf = &ip6_null_entry; + table->tb6_root.fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO; + } + + return table; +} + +static void fib6_link_table(struct fib6_table *tb) +{ + unsigned int h; + + h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + + /* + * No protection necessary, this is the only list mutatation + * operation, tables never disappear once they exist. + */ + hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); +} + +struct fib6_table *fib6_new_table(u32 id) +{ + struct fib6_table *tb; + + if (id == 0) + id = RT6_TABLE_MAIN; + tb = fib6_get_table(id); + if (tb) + return tb; + + tb = fib6_alloc_table(id); + if (tb != NULL) + fib6_link_table(tb); + + return tb; +} + +struct fib6_table *fib6_get_table(u32 id) +{ + struct fib6_table *tb; + struct hlist_node *node; + unsigned int h; + + if (id == 0) + id = RT6_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb6_hlist) { + if (tb->tb6_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + + return NULL; +} + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + /* + * TODO: Add rule lookup + */ + struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN); + + return (struct dst_entry *) lookup(table, fl, flags); +} + +static void __init fib6_tables_init(void) +{ + fib6_link_table(&fib6_main_tbl); +} + +#else + +struct fib6_table *fib6_new_table(u32 id) +{ + return fib6_get_table(id); +} + +struct fib6_table *fib6_get_table(u32 id) +{ + return &fib6_main_tbl; +} + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + return (struct dst_entry *) lookup(&fib6_main_tbl, fl, flags); +} + +static void __init fib6_tables_init(void) +{ +} + +#endif + /* * Routing Table @@ -1064,6 +1185,22 @@ void fib6_clean_tree(struct fib6_node *root, fib6_walk(&c.w); } +void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) +{ + int i; + struct fib6_table *table; + + for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) { + table = fib6_get_table(i); + if (table != NULL) { + write_lock_bh(&table->tb6_lock); + fib6_clean_tree(&table->tb6_root, func, prune, arg); + write_unlock_bh(&table->tb6_lock); + } + } +} + static int fib6_prune_clone(struct rt6_info *rt, void *arg) { if (rt->rt6i_flags & RTF_CACHE) { @@ -1142,11 +1279,8 @@ void fib6_run_gc(unsigned long dummy) } gc_args.more = 0; - - write_lock_bh(&rt6_lock); ndisc_dst_gc(&gc_args.more); - fib6_clean_tree(&ip6_routing_table, fib6_age, 0, NULL); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_age, 0, NULL); if (gc_args.more) mod_timer(&ip6_fib_timer, jiffies + ip6_rt_gc_interval); @@ -1165,6 +1299,8 @@ void __init fib6_init(void) NULL, NULL); if (!fib6_node_kmem) panic("cannot create fib6_nodes cache"); + + fib6_tables_init(); } void fib6_gc_cleanup(void) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index ce1f49b595b0..73efdadb9ab8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -140,16 +140,6 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; -struct fib6_node ip6_routing_table = { - .leaf = &ip6_null_entry, - .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, -}; - -/* Protects all the ip6 fib */ - -DEFINE_RWLOCK(rt6_lock); - - /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -188,8 +178,14 @@ static __inline__ int rt6_check_expired(const struct rt6_info *rt) time_after(jiffies, rt->rt6i_expires)); } +static inline int rt6_need_strict(struct in6_addr *daddr) +{ + return (ipv6_addr_type(daddr) & + (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL)); +} + /* - * Route lookup. Any rt6_lock is implied. + * Route lookup. Any table->tb6_lock is implied. */ static __inline__ struct rt6_info *rt6_device_match(struct rt6_info *rt, @@ -441,27 +437,66 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, } #endif -struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, - int oif, int strict) +#define BACKTRACK() \ +if (rt == &ip6_null_entry && flags & RT6_F_STRICT) { \ + while ((fn = fn->parent) != NULL) { \ + if (fn->fn_flags & RTN_TL_ROOT) { \ + dst_hold(&rt->u.dst); \ + goto out; \ + } \ + if (fn->fn_flags & RTN_RTINFO) \ + goto restart; \ + } \ +} + +static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt; - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, daddr, saddr); - rt = rt6_device_match(fn->leaf, oif, strict); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); +restart: + rt = fn->leaf; + rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); + BACKTRACK(); dst_hold(&rt->u.dst); - rt->u.dst.__use++; - read_unlock_bh(&rt6_lock); +out: + read_unlock_bh(&table->tb6_lock); rt->u.dst.lastuse = jiffies; - if (rt->u.dst.error == 0) - return rt; - dst_release(&rt->u.dst); + rt->u.dst.__use++; + + return rt; + +} + +struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, + int oif, int strict) +{ + struct flowi fl = { + .oif = oif, + .nl_u = { + .ip6_u = { + .daddr = *daddr, + /* TODO: saddr */ + }, + }, + }; + struct dst_entry *dst; + int flags = strict ? RT6_F_STRICT : 0; + + dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); + if (dst->error == 0) + return (struct rt6_info *) dst; + + dst_release(dst); + return NULL; } -/* ip6_ins_rt is called with FREE rt6_lock. +/* ip6_ins_rt is called with FREE table->tb6_lock. It takes new route entry, the addition fails by any reason the route is freed. In any case, if caller does not hold it, it may be destroyed. @@ -471,10 +506,12 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); - err = fib6_add(&ip6_routing_table, rt, nlh, _rtattr, req); - write_unlock_bh(&rt6_lock); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); + err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req); + write_unlock_bh(&table->tb6_lock); return err; } @@ -532,51 +569,40 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -#define BACKTRACK() \ -if (rt == &ip6_null_entry) { \ - while ((fn = fn->parent) != NULL) { \ - if (fn->fn_flags & RTN_ROOT) { \ - goto out; \ - } \ - if (fn->fn_flags & RTN_RTINFO) \ - goto restart; \ - } \ -} - - -void ip6_route_input(struct sk_buff *skb) +struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, + int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict; + int strict = 0; int attempts = 3; int err; int reachable = RT6_SELECT_F_REACHABLE; - strict = ipv6_addr_type(&skb->nh.ipv6h->daddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; + if (flags & RT6_F_STRICT) + strict = RT6_SELECT_F_IFACE; relookup: - read_lock_bh(&rt6_lock); + read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&ip6_routing_table, &skb->nh.ipv6h->daddr, - &skb->nh.ipv6h->saddr); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: - rt = rt6_select(&fn->leaf, skb->dev->ifindex, strict | reachable); + rt = rt6_select(&fn->leaf, fl->iif, strict | reachable); BACKTRACK(); if (rt == &ip6_null_entry || rt->rt6i_flags & RTF_CACHE) goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) - nrt = rt6_alloc_cow(rt, &skb->nh.ipv6h->daddr, &skb->nh.ipv6h->saddr); + nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); else { #if CLONE_OFFLINK_ROUTE - nrt = rt6_alloc_clone(rt, &skb->nh.ipv6h->daddr); + nrt = rt6_alloc_clone(rt, &fl->fl6_dst); #else goto out2; #endif @@ -587,7 +613,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, &NETLINK_CB(skb)); + err = ip6_ins_rt(nrt, NULL, NULL, NULL); if (!err) goto out2; } @@ -596,7 +622,7 @@ restart: goto out2; /* - * Race condition! In the gap, when rt6_lock was + * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -608,30 +634,54 @@ out: goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - skb->dst = (struct dst_entry *) rt; - return; + + return rt; } -struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +void ip6_route_input(struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct flowi fl = { + .iif = skb->dev->ifindex, + .nl_u = { + .ip6_u = { + .daddr = iph->daddr, + .saddr = iph->saddr, + .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, + }, + }, + .proto = iph->nexthdr, + }; + int flags = 0; + + if (rt6_need_strict(&iph->daddr)) + flags |= RT6_F_STRICT; + + skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); +} + +static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; - int strict; + int strict = 0; int attempts = 3; int err; int reachable = RT6_SELECT_F_REACHABLE; - strict = ipv6_addr_type(&fl->fl6_dst) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL) ? RT6_SELECT_F_IFACE : 0; + if (flags & RT6_F_STRICT) + strict = RT6_SELECT_F_IFACE; relookup: - read_lock_bh(&rt6_lock); + read_lock_bh(&table->tb6_lock); restart_2: - fn = fib6_lookup(&ip6_routing_table, &fl->fl6_dst, &fl->fl6_src); + fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = rt6_select(&fn->leaf, fl->oif, strict | reachable); @@ -641,7 +691,7 @@ restart: goto out; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt->rt6i_nexthop && !(rt->rt6i_flags & RTF_NONEXTHOP)) nrt = rt6_alloc_cow(rt, &fl->fl6_dst, &fl->fl6_src); @@ -667,7 +717,7 @@ restart: goto out2; /* - * Race condition! In the gap, when rt6_lock was + * Race condition! In the gap, when table->tb6_lock was * released someone could insert this route. Relookup. */ dst_release(&rt->u.dst); @@ -679,11 +729,21 @@ out: goto restart_2; } dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); out2: rt->u.dst.lastuse = jiffies; rt->u.dst.__use++; - return &rt->u.dst; + return rt; +} + +struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) +{ + int flags = 0; + + if (rt6_need_strict(&fl->fl6_dst)) + flags |= RT6_F_STRICT; + + return fib6_rule_lookup(fl, flags, ip6_pol_route_output); } @@ -906,7 +966,8 @@ int ipv6_get_hoplimit(struct net_device *dev) */ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) + void *_rtattr, struct netlink_skb_parms *req, + u32 table_id) { int err; struct rtmsg *r; @@ -914,6 +975,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; + struct fib6_table *table; int addr_type; rta = (struct rtattr **) _rtattr; @@ -937,6 +999,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (rtmsg->rtmsg_metric == 0) rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + table = fib6_new_table(table_id); + if (table == NULL) { + err = -ENOBUFS; + goto out; + } + rt = ip6_dst_alloc(); if (rt == NULL) { @@ -1093,6 +1161,7 @@ install_route: rt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&rt->u.dst)); rt->u.dst.dev = dev; rt->rt6i_idev = idev; + rt->rt6i_table = table; return ip6_ins_rt(rt, nlh, _rtattr, req); out: @@ -1108,26 +1177,35 @@ out: int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) { int err; + struct fib6_table *table; - write_lock_bh(&rt6_lock); + table = rt->rt6i_table; + write_lock_bh(&table->tb6_lock); err = fib6_del(rt, nlh, _rtattr, req); dst_release(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return err; } -static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req, + u32 table_id) { + struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - read_lock_bh(&rt6_lock); + table = fib6_get_table(table_id); + if (table == NULL) + return err; + + read_lock_bh(&table->tb6_lock); - fn = fib6_locate(&ip6_routing_table, + fn = fib6_locate(&table->tb6_root, &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); @@ -1144,12 +1222,12 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_r rtmsg->rtmsg_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); return ip6_del_rt(rt, nlh, _rtattr, req); } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); return err; } @@ -1161,10 +1239,15 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, int on_link) { struct rt6_info *rt, *nrt = NULL; - int strict; struct fib6_node *fn; + struct fib6_table *table; struct netevent_redirect netevent; + /* TODO: Very lazy, might need to check all tables */ + table = fib6_get_table(RT6_TABLE_MAIN); + if (table == NULL) + return; + /* * Get the "current" route for this destination and * check if the redirect has come from approriate router. @@ -1175,10 +1258,9 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, * is a bit fuzzy and one might need to check all possible * routes. */ - strict = ipv6_addr_type(dest) & (IPV6_ADDR_MULTICAST | IPV6_ADDR_LINKLOCAL); - read_lock_bh(&rt6_lock); - fn = fib6_lookup(&ip6_routing_table, dest, NULL); + read_lock_bh(&table->tb6_lock); + fn = fib6_lookup(&table->tb6_root, dest, NULL); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* @@ -1201,7 +1283,7 @@ restart: } if (rt) dst_hold(&rt->u.dst); - else if (strict) { + else if (rt6_need_strict(dest)) { while ((fn = fn->parent) != NULL) { if (fn->fn_flags & RTN_ROOT) break; @@ -1209,7 +1291,7 @@ restart: goto restart; } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); if (!rt) { if (net_ratelimit()) @@ -1384,6 +1466,7 @@ static struct rt6_info * ip6_rt_copy(struct rt6_info *ort) #ifdef CONFIG_IPV6_SUBTREES memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); #endif + rt->rt6i_table = ort->rt6i_table; } return rt; } @@ -1394,9 +1477,14 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle { struct fib6_node *fn; struct rt6_info *rt = NULL; + struct fib6_table *table; + + table = fib6_get_table(RT6_TABLE_INFO); + if (table == NULL) + return NULL; - write_lock_bh(&rt6_lock); - fn = fib6_locate(&ip6_routing_table, prefix ,prefixlen, NULL, 0); + write_lock_bh(&table->tb6_lock); + fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1411,7 +1499,7 @@ static struct rt6_info *rt6_get_route_info(struct in6_addr *prefix, int prefixle break; } out: - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1433,7 +1521,7 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle rtmsg.rtmsg_flags |= RTF_DEFAULT; rtmsg.rtmsg_ifindex = ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1442,12 +1530,14 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *dev) { struct rt6_info *rt; - struct fib6_node *fn; + struct fib6_table *table; - fn = &ip6_routing_table; + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return NULL; - write_lock_bh(&rt6_lock); - for (rt = fn->leaf; rt; rt=rt->u.next) { + write_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt=rt->u.next) { if (dev == rt->rt6i_dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && ipv6_addr_equal(&rt->rt6i_gateway, addr)) @@ -1455,7 +1545,7 @@ struct rt6_info *rt6_get_dflt_router(struct in6_addr *addr, struct net_device *d } if (rt) dst_hold(&rt->u.dst); - write_unlock_bh(&rt6_lock); + write_unlock_bh(&table->tb6_lock); return rt; } @@ -1474,28 +1564,31 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, rtmsg.rtmsg_ifindex = dev->ifindex; - ip6_route_add(&rtmsg, NULL, NULL, NULL); + ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT); return rt6_get_dflt_router(gwaddr, dev); } void rt6_purge_dflt_routers(void) { struct rt6_info *rt; + struct fib6_table *table; + + /* NOTE: Keep consistent with rt6_get_dflt_router */ + table = fib6_get_table(RT6_TABLE_DFLT); + if (table == NULL) + return; restart: - read_lock_bh(&rt6_lock); - for (rt = ip6_routing_table.leaf; rt; rt = rt->u.next) { + read_lock_bh(&table->tb6_lock); + for (rt = table->tb6_root.leaf; rt; rt = rt->u.next) { if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); - - read_unlock_bh(&rt6_lock); - + read_unlock_bh(&table->tb6_lock); ip6_del_rt(rt, NULL, NULL, NULL); - goto restart; } } - read_unlock_bh(&rt6_lock); + read_unlock_bh(&table->tb6_lock); } int ipv6_route_ioctl(unsigned int cmd, void __user *arg) @@ -1516,10 +1609,12 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL, NULL, NULL); + err = ip6_route_add(&rtmsg, NULL, NULL, NULL, + RT6_TABLE_MAIN); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL, NULL, NULL); + err = ip6_route_del(&rtmsg, NULL, NULL, NULL, + RT6_TABLE_MAIN); break; default: err = -EINVAL; @@ -1593,6 +1688,7 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, ipv6_addr_copy(&rt->rt6i_dst.addr, addr); rt->rt6i_dst.plen = 128; + rt->rt6i_table = fib6_get_table(RT6_TABLE_LOCAL); atomic_set(&rt->u.dst.__refcnt, 1); @@ -1611,9 +1707,7 @@ static int fib6_ifdown(struct rt6_info *rt, void *arg) void rt6_ifdown(struct net_device *dev) { - write_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, fib6_ifdown, 0, dev); - write_unlock_bh(&rt6_lock); + fib6_clean_all(fib6_ifdown, 0, dev); } struct rt6_mtu_change_arg @@ -1663,13 +1757,12 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) void rt6_mtu_change(struct net_device *dev, unsigned mtu) { - struct rt6_mtu_change_arg arg; + struct rt6_mtu_change_arg arg = { + .dev = dev, + .mtu = mtu, + }; - arg.dev = dev; - arg.mtu = mtu; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_mtu_change_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_mtu_change_route, 0, &arg); } static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, @@ -1719,7 +1812,7 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb)); + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1729,7 +1822,7 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb)); + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); } struct rt6_rtnl_dump_arg @@ -1761,6 +1854,10 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_dst_len = rt->rt6i_dst.plen; rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; + if (rt->rt6i_table) + rtm->rtm_table = rt->rt6i_table->tb6_id; + else + rtm->rtm_table = RT6_TABLE_UNSPEC; rtm->rtm_table = RT_TABLE_MAIN; if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; @@ -1868,7 +1965,6 @@ static void fib6_dump_end(struct netlink_callback *cb) if (w) { cb->args[0] = 0; - fib6_walker_unlink(w); kfree(w); } cb->done = (void*)cb->args[1]; @@ -1883,13 +1979,20 @@ static int fib6_dump_done(struct netlink_callback *cb) int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { + struct fib6_table *table; struct rt6_rtnl_dump_arg arg; struct fib6_walker_t *w; - int res; + int i, res = 0; arg.skb = skb; arg.cb = cb; + /* + * cb->args[0] = pointer to walker structure + * cb->args[1] = saved cb->done() pointer + * cb->args[2] = current table being dumped + */ + w = (void*)cb->args[0]; if (w == NULL) { /* New dump: @@ -1905,24 +2008,48 @@ int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) w = kzalloc(sizeof(*w), GFP_ATOMIC); if (w == NULL) return -ENOMEM; - RT6_TRACE("dump<%p", w); - w->root = &ip6_routing_table; w->func = fib6_dump_node; w->args = &arg; cb->args[0] = (long)w; - read_lock_bh(&rt6_lock); - res = fib6_walk(w); - read_unlock_bh(&rt6_lock); + cb->args[2] = FIB6_TABLE_MIN; } else { w->args = &arg; - read_lock_bh(&rt6_lock); - res = fib6_walk_continue(w); - read_unlock_bh(&rt6_lock); + i = cb->args[2]; + if (i > FIB6_TABLE_MAX) + goto end; + + table = fib6_get_table(i); + if (table != NULL) { + read_lock_bh(&table->tb6_lock); + w->root = &table->tb6_root; + res = fib6_walk_continue(w); + read_unlock_bh(&table->tb6_lock); + if (res != 0) { + if (res < 0) + fib6_walker_unlink(w); + goto end; + } + } + + fib6_walker_unlink(w); + cb->args[2] = ++i; } -#if RT6_DEBUG >= 3 - if (res <= 0 && skb->len == 0) - RT6_TRACE("%p>dump end\n", w); -#endif + + for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) { + table = fib6_get_table(i); + if (table == NULL) + continue; + + read_lock_bh(&table->tb6_lock); + w->root = &table->tb6_root; + res = fib6_walk(w); + read_unlock_bh(&table->tb6_lock); + if (res) + break; + } +end: + cb->args[2] = i; + res = res < 0 ? res : skb->len; /* res < 0 is an error. (really, impossible) res == 0 means that dump is complete, but skb still can contain data. @@ -2102,16 +2229,13 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) static int rt6_proc_info(char *buffer, char **start, off_t offset, int length) { - struct rt6_proc_arg arg; - arg.buffer = buffer; - arg.offset = offset; - arg.length = length; - arg.skip = 0; - arg.len = 0; + struct rt6_proc_arg arg = { + .buffer = buffer, + .offset = offset, + .length = length, + }; - read_lock_bh(&rt6_lock); - fib6_clean_tree(&ip6_routing_table, rt6_info_route, 0, &arg); - read_unlock_bh(&rt6_lock); + fib6_clean_all(rt6_info_route, 0, &arg); *start = buffer; if (offset) -- cgit v1.3-8-gc7d7 From 14c0b97ddfc2944982d078b8e33b088840068976 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:38:38 -0700 Subject: [NET]: Protocol Independant Policy Routing Rules Framework Derived from net/ipv/fib_rules.c Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 60 +++++++ include/net/fib_rules.h | 90 ++++++++++ net/Kconfig | 3 + net/core/Makefile | 1 + net/core/fib_rules.c | 416 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/rtnetlink.c | 9 +- 6 files changed, 577 insertions(+), 2 deletions(-) create mode 100644 include/linux/fib_rules.h create mode 100644 include/net/fib_rules.h create mode 100644 net/core/fib_rules.c (limited to 'include/net') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h new file mode 100644 index 000000000000..5e503f0ca6e4 --- /dev/null +++ b/include/linux/fib_rules.h @@ -0,0 +1,60 @@ +#ifndef __LINUX_FIB_RULES_H +#define __LINUX_FIB_RULES_H + +#include +#include + +/* rule is permanent, and cannot be deleted */ +#define FIB_RULE_PERMANENT 1 + +struct fib_rule_hdr +{ + __u8 family; + __u8 dst_len; + __u8 src_len; + __u8 tos; + + __u8 table; + __u8 res1; /* reserved */ + __u8 res2; /* reserved */ + __u8 action; + + __u32 flags; +}; + +enum +{ + FRA_UNSPEC, + FRA_DST, /* destination address */ + FRA_SRC, /* source address */ + FRA_IFNAME, /* interface name */ + FRA_UNUSED1, + FRA_UNUSED2, + FRA_PRIORITY, /* priority/preference */ + FRA_UNUSED3, + FRA_UNUSED4, + FRA_UNUSED5, + FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FLOW, /* flow/class id */ + __FRA_MAX +}; + +#define FRA_MAX (__FRA_MAX - 1) + +enum +{ + FR_ACT_UNSPEC, + FR_ACT_TO_TBL, /* Pass to fixed table */ + FR_ACT_RES1, + FR_ACT_RES2, + FR_ACT_RES3, + FR_ACT_RES4, + FR_ACT_BLACKHOLE, /* Drop without notification */ + FR_ACT_UNREACHABLE, /* Drop with ENETUNREACH */ + FR_ACT_PROHIBIT, /* Drop with EACCES */ + __FR_ACT_MAX, +}; + +#define FR_ACT_MAX (__FR_ACT_MAX - 1) + +#endif diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h new file mode 100644 index 000000000000..61375d9e53f8 --- /dev/null +++ b/include/net/fib_rules.h @@ -0,0 +1,90 @@ +#ifndef __NET_FIB_RULES_H +#define __NET_FIB_RULES_H + +#include +#include +#include +#include +#include + +struct fib_rule +{ + struct list_head list; + atomic_t refcnt; + int ifindex; + char ifname[IFNAMSIZ]; + u32 pref; + u32 flags; + u32 table; + u8 action; + struct rcu_head rcu; +}; + +struct fib_lookup_arg +{ + void *lookup_ptr; + void *result; + struct fib_rule *rule; +}; + +struct fib_rules_ops +{ + int family; + struct list_head list; + int rule_size; + + int (*action)(struct fib_rule *, + struct flowi *, int, + struct fib_lookup_arg *); + int (*match)(struct fib_rule *, + struct flowi *, int); + int (*configure)(struct fib_rule *, + struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *, + struct nlattr **); + int (*compare)(struct fib_rule *, + struct fib_rule_hdr *, + struct nlattr **); + int (*fill)(struct fib_rule *, struct sk_buff *, + struct nlmsghdr *, + struct fib_rule_hdr *); + u32 (*default_pref)(void); + + int nlgroup; + struct nla_policy *policy; + struct list_head *rules_list; + struct module *owner; +}; + +static inline void fib_rule_get(struct fib_rule *rule) +{ + atomic_inc(&rule->refcnt); +} + +static inline void fib_rule_put_rcu(struct rcu_head *head) +{ + struct fib_rule *rule = container_of(head, struct fib_rule, rcu); + kfree(rule); +} + +static inline void fib_rule_put(struct fib_rule *rule) +{ + if (atomic_dec_and_test(&rule->refcnt)) + call_rcu(&rule->rcu, fib_rule_put_rcu); +} + +extern int fib_rules_register(struct fib_rules_ops *); +extern int fib_rules_unregister(struct fib_rules_ops *); + +extern int fib_rules_lookup(struct fib_rules_ops *, + struct flowi *, int flags, + struct fib_lookup_arg *); + +extern int fib_nl_newrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_nl_delrule(struct sk_buff *, + struct nlmsghdr *, void *); +extern int fib_rules_dump(struct sk_buff *, + struct netlink_callback *, int); +#endif diff --git a/net/Kconfig b/net/Kconfig index eb855b7fa642..6528a935622c 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -251,6 +251,9 @@ config WIRELESS_EXT source "net/netlabel/Kconfig" +config FIB_RULES + bool + endif # if NET endmenu # Networking diff --git a/net/core/Makefile b/net/core/Makefile index 2645ba428d48..119568077dab 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -17,3 +17,4 @@ obj-$(CONFIG_NET_PKTGEN) += pktgen.o obj-$(CONFIG_WIRELESS_EXT) += wireless.o obj-$(CONFIG_NETPOLL) += netpoll.o obj-$(CONFIG_NET_DMA) += user_dma.o +obj-$(CONFIG_FIB_RULES) += fib_rules.o diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c new file mode 100644 index 000000000000..6cdad24038e2 --- /dev/null +++ b/net/core/fib_rules.c @@ -0,0 +1,416 @@ +/* + * net/core/fib_rules.c Generic Routing Rules + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors: Thomas Graf + */ + +#include +#include +#include +#include +#include + +static LIST_HEAD(rules_ops); +static DEFINE_SPINLOCK(rules_mod_lock); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops); + +static struct fib_rules_ops *lookup_rules_ops(int family) +{ + struct fib_rules_ops *ops; + + rcu_read_lock(); + list_for_each_entry_rcu(ops, &rules_ops, list) { + if (ops->family == family) { + if (!try_module_get(ops->owner)) + ops = NULL; + rcu_read_unlock(); + return ops; + } + } + rcu_read_unlock(); + + return NULL; +} + +static void rules_ops_put(struct fib_rules_ops *ops) +{ + if (ops) + module_put(ops->owner); +} + +int fib_rules_register(struct fib_rules_ops *ops) +{ + int err = -EEXIST; + struct fib_rules_ops *o; + + if (ops->rule_size < sizeof(struct fib_rule)) + return -EINVAL; + + if (ops->match == NULL || ops->configure == NULL || + ops->compare == NULL || ops->fill == NULL || + ops->action == NULL) + return -EINVAL; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) + if (ops->family == o->family) + goto errout; + + list_add_tail_rcu(&ops->list, &rules_ops); + err = 0; +errout: + spin_unlock(&rules_mod_lock); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_register); + +static void cleanup_ops(struct fib_rules_ops *ops) +{ + struct fib_rule *rule, *tmp; + + list_for_each_entry_safe(rule, tmp, ops->rules_list, list) { + list_del_rcu(&rule->list); + fib_rule_put(rule); + } +} + +int fib_rules_unregister(struct fib_rules_ops *ops) +{ + int err = 0; + struct fib_rules_ops *o; + + spin_lock(&rules_mod_lock); + list_for_each_entry(o, &rules_ops, list) { + if (o == ops) { + list_del_rcu(&o->list); + cleanup_ops(ops); + goto out; + } + } + + err = -ENOENT; +out: + spin_unlock(&rules_mod_lock); + + synchronize_rcu(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_unregister); + +int fib_rules_lookup(struct fib_rules_ops *ops, struct flowi *fl, + int flags, struct fib_lookup_arg *arg) +{ + struct fib_rule *rule; + int err; + + rcu_read_lock(); + + list_for_each_entry_rcu(rule, ops->rules_list, list) { + if (rule->ifindex && (rule->ifindex != fl->iif)) + continue; + + if (!ops->match(rule, fl, flags)) + continue; + + err = ops->action(rule, fl, flags, arg); + if (err != -EAGAIN) { + fib_rule_get(rule); + arg->rule = rule; + goto out; + } + } + + err = -ENETUNREACH; +out: + rcu_read_unlock(); + + return err; +} + +EXPORT_SYMBOL_GPL(fib_rules_lookup); + +int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule, *r, *last = NULL; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + if (tb[FRA_IFNAME] && nla_len(tb[FRA_IFNAME]) > IFNAMSIZ) + goto errout; + + rule = kzalloc(ops->rule_size, GFP_KERNEL); + if (rule == NULL) { + err = -ENOMEM; + goto errout; + } + + if (tb[FRA_PRIORITY]) + rule->pref = nla_get_u32(tb[FRA_PRIORITY]); + + if (tb[FRA_IFNAME]) { + struct net_device *dev; + + rule->ifindex = -1; + if (nla_strlcpy(rule->ifname, tb[FRA_IFNAME], + IFNAMSIZ) >= IFNAMSIZ) + goto errout_free; + + dev = __dev_get_by_name(rule->ifname); + if (dev) + rule->ifindex = dev->ifindex; + } + + rule->action = frh->action; + rule->flags = frh->flags; + rule->table = frh->table; + + if (!rule->pref && ops->default_pref) + rule->pref = ops->default_pref(); + + err = ops->configure(rule, skb, nlh, frh, tb); + if (err < 0) + goto errout_free; + + list_for_each_entry(r, ops->rules_list, list) { + if (r->pref > rule->pref) + break; + last = r; + } + + fib_rule_get(rule); + + if (last) + list_add_rcu(&rule->list, &last->list); + else + list_add_rcu(&rule->list, ops->rules_list); + + notify_rule_change(RTM_NEWRULE, rule, ops); + rules_ops_put(ops); + return 0; + +errout_free: + kfree(rule); +errout: + rules_ops_put(ops); + return err; +} + +int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) +{ + struct fib_rule_hdr *frh = nlmsg_data(nlh); + struct fib_rules_ops *ops = NULL; + struct fib_rule *rule; + struct nlattr *tb[FRA_MAX+1]; + int err = -EINVAL; + + if (nlh->nlmsg_len < nlmsg_msg_size(sizeof(*frh))) + goto errout; + + ops = lookup_rules_ops(frh->family); + if (ops == NULL) { + err = EAFNOSUPPORT; + goto errout; + } + + err = nlmsg_parse(nlh, sizeof(*frh), tb, FRA_MAX, ops->policy); + if (err < 0) + goto errout; + + list_for_each_entry(rule, ops->rules_list, list) { + if (frh->action && (frh->action != rule->action)) + continue; + + if (frh->table && (frh->table != rule->table)) + continue; + + if (tb[FRA_PRIORITY] && + (rule->pref != nla_get_u32(tb[FRA_PRIORITY]))) + continue; + + if (tb[FRA_IFNAME] && + nla_strcmp(tb[FRA_IFNAME], rule->ifname)) + continue; + + if (!ops->compare(rule, frh, tb)) + continue; + + if (rule->flags & FIB_RULE_PERMANENT) { + err = -EPERM; + goto errout; + } + + list_del_rcu(&rule->list); + synchronize_rcu(); + notify_rule_change(RTM_DELRULE, rule, ops); + fib_rule_put(rule); + rules_ops_put(ops); + return 0; + } + + err = -ENOENT; +errout: + rules_ops_put(ops); + return err; +} + +static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, + u32 pid, u32 seq, int type, int flags, + struct fib_rules_ops *ops) +{ + struct nlmsghdr *nlh; + struct fib_rule_hdr *frh; + + nlh = nlmsg_put(skb, pid, seq, type, sizeof(*frh), flags); + if (nlh == NULL) + return -1; + + frh = nlmsg_data(nlh); + frh->table = rule->table; + frh->res1 = 0; + frh->res2 = 0; + frh->action = rule->action; + frh->flags = rule->flags; + + if (rule->ifname[0]) + NLA_PUT_STRING(skb, FRA_IFNAME, rule->ifname); + + if (rule->pref) + NLA_PUT_U32(skb, FRA_PRIORITY, rule->pref); + + if (ops->fill(rule, skb, nlh, frh) < 0) + goto nla_put_failure; + + return nlmsg_end(skb, nlh); + +nla_put_failure: + return nlmsg_cancel(skb, nlh); +} + +int fib_rules_dump(struct sk_buff *skb, struct netlink_callback *cb, int family) +{ + int idx = 0; + struct fib_rule *rule; + struct fib_rules_ops *ops; + + ops = lookup_rules_ops(family); + if (ops == NULL) + return -EAFNOSUPPORT; + + rcu_read_lock(); + list_for_each_entry(rule, ops->rules_list, list) { + if (idx < cb->args[0]) + goto skip; + + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + cb->nlh->nlmsg_seq, RTM_NEWRULE, + NLM_F_MULTI, ops) < 0) + break; +skip: + idx++; + } + rcu_read_unlock(); + cb->args[0] = idx; + rules_ops_put(ops); + + return skb->len; +} + +EXPORT_SYMBOL_GPL(fib_rules_dump); + +static void notify_rule_change(int event, struct fib_rule *rule, + struct fib_rules_ops *ops) +{ + int size = nlmsg_total_size(sizeof(struct fib_rule_hdr) + 128); + struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); + + if (skb == NULL) + netlink_set_err(rtnl, 0, ops->nlgroup, ENOBUFS); + else if (fib_nl_fill_rule(skb, rule, 0, 0, event, 0, ops) < 0) { + kfree_skb(skb); + netlink_set_err(rtnl, 0, ops->nlgroup, EINVAL); + } else + netlink_broadcast(rtnl, skb, 0, ops->nlgroup, GFP_KERNEL); +} + +static void attach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) { + if (rule->ifindex == -1 && + strcmp(dev->name, rule->ifname) == 0) + rule->ifindex = dev->ifindex; + } +} + +static void detach_rules(struct list_head *rules, struct net_device *dev) +{ + struct fib_rule *rule; + + list_for_each_entry(rule, rules, list) + if (rule->ifindex == dev->ifindex) + rule->ifindex = -1; +} + + +static int fib_rules_event(struct notifier_block *this, unsigned long event, + void *ptr) +{ + struct net_device *dev = ptr; + struct fib_rules_ops *ops; + + ASSERT_RTNL(); + rcu_read_lock(); + + switch (event) { + case NETDEV_REGISTER: + list_for_each_entry(ops, &rules_ops, list) + attach_rules(ops->rules_list, dev); + break; + + case NETDEV_UNREGISTER: + list_for_each_entry(ops, &rules_ops, list) + detach_rules(ops->rules_list, dev); + break; + } + + rcu_read_unlock(); + + return NOTIFY_DONE; +} + +static struct notifier_block fib_rules_notifier = { + .notifier_call = fib_rules_event, +}; + +static int __init fib_rules_init(void) +{ + return register_netdevice_notifier(&fib_rules_notifier); +} + +subsys_initcall(fib_rules_init); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 30cc1ba6ed5c..aa7cff2257b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #ifdef CONFIG_NET_WIRELESS_RTNETLINK #include @@ -103,7 +104,7 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), - [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), + [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTFILTER)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -120,7 +121,7 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, - [RTM_FAM(RTM_NEWRULE)] = RTA_MAX, + [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, [RTM_FAM(RTM_NEWTFILTER)] = TCA_MAX, @@ -757,6 +758,10 @@ static struct rtnetlink_link link_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_NEWNEIGH - RTM_BASE] = { .doit = neigh_add }, [RTM_DELNEIGH - RTM_BASE] = { .doit = neigh_delete }, [RTM_GETNEIGH - RTM_BASE] = { .dumpit = neigh_dump_info }, +#ifdef CONFIG_FIB_RULES + [RTM_NEWRULE - RTM_BASE] = { .doit = fib_nl_newrule }, + [RTM_DELRULE - RTM_BASE] = { .doit = fib_nl_delrule }, +#endif [RTM_GETRULE - RTM_BASE] = { .dumpit = rtnetlink_dump_all }, [RTM_GETNEIGHTBL - RTM_BASE] = { .dumpit = neightbl_dump_info }, [RTM_SETNEIGHTBL - RTM_BASE] = { .doit = neightbl_set }, -- cgit v1.3-8-gc7d7 From 101367c2f8c464ea96643192673aa18d88e6336d Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:02 -0700 Subject: [IPV6]: Policy Routing Rules Adds support for policy routing rules including a new local table for routes with a local destination. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 2 + include/net/ip6_fib.h | 9 +- include/net/ip6_route.h | 5 + net/ipv6/Kconfig | 1 + net/ipv6/Makefile | 1 + net/ipv6/addrconf.c | 1 + net/ipv6/fib6_rules.c | 251 ++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_fib.c | 21 ++-- net/ipv6/route.c | 50 +++++++++ 9 files changed, 329 insertions(+), 12 deletions(-) create mode 100644 net/ipv6/fib6_rules.c (limited to 'include/net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index facd9ee37b76..bf353538ae93 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -889,6 +889,8 @@ enum rtnetlink_groups { RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX + RTNLGRP_IPV6_RULE, +#define RTNLGRP_IPV6_RULE RTNLGRP_IPV6_RULE __RTNLGRP_MAX }; #define RTNLGRP_MAX (__RTNLGRP_MAX - 1) diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 818411519c89..7b47e8d5a765 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -155,7 +155,6 @@ struct fib6_table { #define RT6_TABLE_UNSPEC RT_TABLE_UNSPEC #define RT6_TABLE_MAIN RT_TABLE_MAIN -#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #define RT6_TABLE_DFLT RT6_TABLE_MAIN #define RT6_TABLE_INFO RT6_TABLE_MAIN #define RT6_TABLE_PREFIX RT6_TABLE_MAIN @@ -163,9 +162,11 @@ struct fib6_table { #ifdef CONFIG_IPV6_MULTIPLE_TABLES #define FIB6_TABLE_MIN 1 #define FIB6_TABLE_MAX RT_TABLE_MAX +#define RT6_TABLE_LOCAL RT_TABLE_LOCAL #else #define FIB6_TABLE_MIN RT_TABLE_MAIN #define FIB6_TABLE_MAX FIB6_TABLE_MIN +#define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif #define RT6_F_STRICT 1 @@ -221,5 +222,11 @@ extern void fib6_run_gc(unsigned long dummy); extern void fib6_gc_cleanup(void); extern void fib6_init(void); + +extern void fib6_rules_init(void); +extern void fib6_rules_cleanup(void); +extern int fib6_rules_dump(struct sk_buff *, + struct netlink_callback *); + #endif #endif diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index d49c8c90eb68..9bfa3cc6cedb 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -41,6 +41,11 @@ struct pol_chain { extern struct rt6_info ip6_null_entry; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES +extern struct rt6_info ip6_prohibit_entry; +extern struct rt6_info ip6_blk_hole_entry; +#endif + extern int ip6_rt_gc_interval; extern void ip6_route_input(struct sk_buff *skb); diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 159c63d99c81..36a6c2b79889 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -139,6 +139,7 @@ config IPV6_TUNNEL config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on IPV6 && EXPERIMENTAL + select FIB_RULES ---help--- Support multiple routing tables. diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 386e0a626948..9eebf6091279 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -13,6 +13,7 @@ ipv6-objs := af_inet6.o anycast.o ip6_output.o ip6_input.o addrconf.o sit.o \ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o +ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 318767fcefdc..ed766eebc022 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3528,6 +3528,7 @@ static struct rtnetlink_link inet6_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_DELROUTE - RTM_BASE] = { .doit = inet6_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = inet6_rtm_getroute, .dumpit = inet6_dump_fib, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib6_rules_dump, }, }; static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c new file mode 100644 index 000000000000..c3c8195744ee --- /dev/null +++ b/net/ipv6/fib6_rules.c @@ -0,0 +1,251 @@ +/* + * net/ipv6/fib6_rules.c IPv6 Routing Policy Rules + * + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License as + * published by the Free Software Foundation, version 2. + * + * Authors + * Thomas Graf + * Ville Nuorvala + */ + +#include +#include + +#include +#include +#include +#include + +struct fib6_rule +{ + struct fib_rule common; + struct rt6key src; + struct rt6key dst; + u8 tclass; +}; + +static struct fib_rules_ops fib6_rules_ops; + +static struct fib6_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_MAIN, + }, +}; + +static struct fib6_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0, + .action = FR_ACT_TO_TBL, + .table = RT6_TABLE_LOCAL, + .flags = FIB_RULE_PERMANENT, + }, +}; + +static LIST_HEAD(fib6_rules); + +struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, + pol_lookup_t lookup) +{ + struct fib_lookup_arg arg = { + .lookup_ptr = lookup, + }; + + fib_rules_lookup(&fib6_rules_ops, fl, flags, &arg); + if (arg.rule) + fib_rule_put(arg.rule); + + return (struct dst_entry *) arg.result; +} + +int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) +{ + struct rt6_info *rt = NULL; + struct fib6_table *table; + pol_lookup_t lookup = arg->lookup_ptr; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + case FR_ACT_UNREACHABLE: + rt = &ip6_null_entry; + goto discard_pkt; + default: + case FR_ACT_BLACKHOLE: + rt = &ip6_blk_hole_entry; + goto discard_pkt; + case FR_ACT_PROHIBIT: + rt = &ip6_prohibit_entry; + goto discard_pkt; + } + + table = fib6_get_table(rule->table); + if (table) + rt = lookup(table, flp, flags); + + if (rt != &ip6_null_entry) + goto out; + + dst_release(&rt->u.dst); +discard_pkt: + dst_hold(&rt->u.dst); +out: + arg->result = rt; + return rt == NULL ? -EAGAIN : 0; +} + + +static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib6_rule *r = (struct fib6_rule *) rule; + + if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) + return 0; + + if ((flags & RT6_F_HAS_SADDR) && + !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) + return 0; + + return 1; +} + +static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, + [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, +}; + +static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len > 128 || frh->dst_len > 128 || + (frh->tos & ~IPV6_FLOWINFO_MASK)) + goto errout; + + if (rule->action == FR_ACT_TO_TBL) { + if (rule->table == RT6_TABLE_UNSPEC) + goto errout; + + if (fib6_new_table(rule->table) == NULL) { + err = -ENOBUFS; + goto errout; + } + } + + if (tb[FRA_SRC]) + nla_memcpy(&rule6->src.addr, tb[FRA_SRC], + sizeof(struct in6_addr)); + + if (tb[FRA_DST]) + nla_memcpy(&rule6->dst.addr, tb[FRA_DST], + sizeof(struct in6_addr)); + + rule6->src.plen = frh->src_len; + rule6->dst.plen = frh->dst_len; + rule6->tclass = frh->tos; + + err = 0; +errout: + return err; +} + +static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + if (frh->src_len && (rule6->src.plen != frh->src_len)) + return 0; + + if (frh->dst_len && (rule6->dst.plen != frh->dst_len)) + return 0; + + if (frh->tos && (rule6->tclass != frh->tos)) + return 0; + + if (tb[FRA_SRC] && + nla_memcmp(tb[FRA_SRC], &rule6->src.addr, sizeof(struct in6_addr))) + return 0; + + if (tb[FRA_DST] && + nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) + return 0; + + return 1; +} + +static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib6_rule *rule6 = (struct fib6_rule *) rule; + + frh->family = AF_INET6; + frh->dst_len = rule6->dst.plen; + frh->src_len = rule6->src.plen; + frh->tos = rule6->tclass; + + if (rule6->dst.plen) + NLA_PUT(skb, FRA_DST, sizeof(struct in6_addr), + &rule6->dst.addr); + + if (rule6->src.plen) + NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), + &rule6->src.addr); + + return 0; + +nla_put_failure: + return -ENOBUFS; +} + +int fib6_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET6); +} + +static u32 fib6_rule_default_pref(void) +{ + return 0x3FFF; +} + +static struct fib_rules_ops fib6_rules_ops = { + .family = AF_INET6, + .rule_size = sizeof(struct fib6_rule), + .action = fib6_rule_action, + .match = fib6_rule_match, + .configure = fib6_rule_configure, + .compare = fib6_rule_compare, + .fill = fib6_rule_fill, + .default_pref = fib6_rule_default_pref, + .nlgroup = RTNLGRP_IPV6_RULE, + .policy = fib6_rule_policy, + .rules_list = &fib6_rules, + .owner = THIS_MODULE, +}; + +void __init fib6_rules_init(void) +{ + list_add_tail(&local_rule.common.list, &fib6_rules); + list_add_tail(&main_rule.common.list, &fib6_rules); + + fib_rules_register(&fib6_rules_ops); +} + +void fib6_rules_cleanup(void) +{ + fib_rules_unregister(&fib6_rules_ops); +} diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index fcd7da830aca..ce226c14bef5 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -159,6 +159,15 @@ static struct fib6_table fib6_main_tbl = { #ifdef CONFIG_IPV6_MULTIPLE_TABLES +static struct fib6_table fib6_local_tbl = { + .tb6_id = RT6_TABLE_LOCAL, + .tb6_lock = RW_LOCK_UNLOCKED, + .tb6_root = { + .leaf = &ip6_null_entry, + .fn_flags = RTN_ROOT | RTN_TL_ROOT | RTN_RTINFO, + }, +}; + #define FIB_TABLE_HASHSZ 256 static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; @@ -228,20 +237,10 @@ struct fib6_table *fib6_get_table(u32 id) return NULL; } -struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, - pol_lookup_t lookup) -{ - /* - * TODO: Add rule lookup - */ - struct fib6_table *table = fib6_get_table(RT6_TABLE_MAIN); - - return (struct dst_entry *) lookup(table, fl, flags); -} - static void __init fib6_tables_init(void) { fib6_link_table(&fib6_main_tbl); + fib6_link_table(&fib6_local_tbl); } #else diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 73efdadb9ab8..438977e2085d 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -140,6 +140,50 @@ struct rt6_info ip6_null_entry = { .rt6i_ref = ATOMIC_INIT(1), }; +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + +struct rt6_info ip6_prohibit_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EACCES, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_prohibit_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +struct rt6_info ip6_blk_hole_entry = { + .u = { + .dst = { + .__refcnt = ATOMIC_INIT(1), + .__use = 1, + .dev = &loopback_dev, + .obsolete = -1, + .error = -EINVAL, + .metrics = { [RTAX_HOPLIMIT - 1] = 255, }, + .input = ip6_pkt_discard, + .output = ip6_pkt_discard_out, + .ops = &ip6_dst_ops, + .path = (struct dst_entry*)&ip6_blk_hole_entry, + } + }, + .rt6i_flags = (RTF_REJECT | RTF_NONEXTHOP), + .rt6i_metric = ~(u32) 0, + .rt6i_ref = ATOMIC_INIT(1), +}; + +#endif + /* allocate dst with ip6_dst_ops */ static __inline__ struct rt6_info *ip6_dst_alloc(void) { @@ -2408,10 +2452,16 @@ void __init ip6_route_init(void) #ifdef CONFIG_XFRM xfrm6_init(); #endif +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_init(); +#endif } void ip6_route_cleanup(void) { +#ifdef CONFIG_IPV6_MULTIPLE_TABLES + fib6_rules_cleanup(); +#endif #ifdef CONFIG_PROC_FS proc_net_remove("ipv6_route"); proc_net_remove("rt6_stats"); -- cgit v1.3-8-gc7d7 From e1ef4bf23b1ced0bf78a1c98289f746486e5c912 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 03:39:22 -0700 Subject: [IPV4]: Use Protocol Independant Policy Routing Rules Framework Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 14 +- net/ipv4/Kconfig | 1 + net/ipv4/devinet.c | 4 +- net/ipv4/fib_frontend.c | 2 +- net/ipv4/fib_rules.c | 605 +++++++++++++++++++----------------------------- 5 files changed, 249 insertions(+), 377 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index a095d1dec7a4..14c82e611c95 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -18,6 +18,7 @@ #include #include +#include /* WARNING: The ordering of these elements must match ordering * of RTA_* rtnetlink attribute numbers. @@ -203,9 +204,8 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result #define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; -extern int fib_lookup(const struct flowi *flp, struct fib_result *res); +extern int fib_lookup(struct flowi *flp, struct fib_result *res); extern struct fib_table *__fib_new_table(int id); -extern void fib_rule_put(struct fib_rule *r); static inline struct fib_table *fib_get_table(int id) { @@ -251,15 +251,15 @@ extern u32 __fib_res_prefsrc(struct fib_result *res); extern struct fib_table *fib_hash_init(int id); #ifdef CONFIG_IP_MULTIPLE_TABLES -/* Exported by fib_rules.c */ +extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); + +extern void __init fib4_rules_init(void); +extern void __exit fib4_rules_cleanup(void); -extern int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); -extern int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); -extern int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); #endif -extern void fib_rules_init(void); + #endif static inline void fib_combine_itag(u32 *itag, struct fib_result *res) diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig index 3b5d504a74be..1650b64415aa 100644 --- a/net/ipv4/Kconfig +++ b/net/ipv4/Kconfig @@ -88,6 +88,7 @@ config IP_FIB_HASH config IP_MULTIPLE_TABLES bool "IP: policy routing" depends on IP_ADVANCED_ROUTER + select FIB_RULES ---help--- Normally, a router decides what to do with a received packet based solely on the packet's final destination address. If you say Y here, diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index a6cc31d911eb..9f3ffbec3296 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1151,9 +1151,7 @@ static struct rtnetlink_link inet_rtnetlink_table[RTM_NR_MSGTYPES] = { [RTM_GETROUTE - RTM_BASE] = { .doit = inet_rtm_getroute, .dumpit = inet_dump_fib, }, #ifdef CONFIG_IP_MULTIPLE_TABLES - [RTM_NEWRULE - RTM_BASE] = { .doit = inet_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = inet_rtm_delrule, }, - [RTM_GETRULE - RTM_BASE] = { .dumpit = inet_dump_rules, }, + [RTM_GETRULE - RTM_BASE] = { .dumpit = fib4_rules_dump, }, #endif }; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ba2a70745a63..fe4a53d4d10d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -656,7 +656,7 @@ void __init ip_fib_init(void) ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); #else - fib_rules_init(); + fib4_rules_init(); #endif register_netdevice_notifier(&fib_netdev_notifier); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 79b04718bdfd..23ec6ae1a0f6 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -5,9 +5,8 @@ * * IPv4 Forwarding Information Base: policy rules. * - * Version: $Id: fib_rules.c,v 1.17 2001/10/31 21:55:54 davem Exp $ - * * Authors: Alexey Kuznetsov, + * Thomas Graf * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -19,129 +18,154 @@ * Marc Boucher : routing by fwmark */ -#include -#include -#include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include #include -#include -#include -#include #include +#include #include #include #include - #include -#include #include #include -#include #include +#include -#define FRprintk(a...) +static struct fib_rules_ops fib4_rules_ops; -struct fib_rule +struct fib4_rule { - struct hlist_node hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - u32 r_src; - u32 r_srcmask; - u32 r_dst; - u32 r_dstmask; - u32 r_srcmap; - u8 r_flags; - u8 r_tos; + struct fib_rule common; + u8 dst_len; + u8 src_len; + u8 tos; + u32 src; + u32 srcmask; + u32 dst; + u32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; #endif - int r_ifindex; #ifdef CONFIG_NET_CLS_ROUTE - __u32 r_tclassid; + u32 tclassid; #endif - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; -static struct fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFF, - .r_table = RT_TABLE_DEFAULT, - .r_action = RTN_UNICAST, +static struct fib4_rule default_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFF, + .table = RT_TABLE_DEFAULT, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule main_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7FFE, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST, +static struct fib4_rule main_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7FFE, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct fib_rule local_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_table = RT_TABLE_LOCAL, - .r_action = RTN_UNICAST, +static struct fib4_rule local_rule = { + .common = { + .refcnt = ATOMIC_INIT(2), + .table = RT_TABLE_LOCAL, + .action = FR_ACT_TO_TBL, + .flags = FIB_RULE_PERMANENT, + }, }; -static struct hlist_head fib_rules; +static LIST_HEAD(fib4_rules); + +#ifdef CONFIG_NET_CLS_ROUTE +u32 fib_rules_tclass(struct fib_result *res) +{ + return res->r ? ((struct fib4_rule *) res->r)->tclassid : 0; +} +#endif -/* writer func called from netlink -- rtnl_sem hold*/ +int fib_lookup(struct flowi *flp, struct fib_result *res) +{ + struct fib_lookup_arg arg = { + .result = res, + }; + int err; -static void rtmsg_rule(int, struct fib_rule *); + err = fib_rules_lookup(&fib4_rules_ops, flp, 0, &arg); + res->r = arg.rule; -int inet_rtm_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) + return err; +} + +int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 4) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 4) == 0) && - rtm->rtm_tos == r->r_tos && -#ifdef CONFIG_IP_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - err = -EPERM; - if (r == &local_rule) - break; - - hlist_del_rcu(&r->hlist); - r->r_dead = 1; - rtmsg_rule(RTM_DELRULE, r); - fib_rule_put(r); - err = 0; - break; - } + int err = -EAGAIN; + struct fib_table *tbl; + + switch (rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + if ((tbl = fib_get_table(rule->table)) == NULL) + goto errout; + + err = tbl->tb_lookup(tbl, flp, (struct fib_result *) arg->result); + if (err > 0) + err = -EAGAIN; +errout: return err; } -/* Allocate new unique table id */ + +void fib_select_default(const struct flowi *flp, struct fib_result *res) +{ + if (res->r && res->r->action == FR_ACT_TO_TBL && + FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { + struct fib_table *tb; + if ((tb = fib_get_table(res->r->table)) != NULL) + tb->tb_select_default(tb, flp, res); + } +} + +static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) +{ + struct fib4_rule *r = (struct fib4_rule *) rule; + u32 daddr = fl->fl4_dst; + u32 saddr = fl->fl4_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; + + if (r->tos && (r->tos != fl->fl4_tos)) + return 0; + +#ifdef CONFIG_IP_ROUTE_FWMARK + if (r->fwmark && (r->fwmark != fl->fl4_fwmark)) + return 0; +#endif + + return 1; +} static struct fib_table *fib_empty_table(void) { @@ -153,329 +177,178 @@ static struct fib_table *fib_empty_table(void) return NULL; } -static inline void fib_rule_put_rcu(struct rcu_head *head) -{ - struct fib_rule *r = container_of(head, struct fib_rule, rcu); - kfree(r); -} +static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U32 }, + [FRA_DST] = { .type = NLA_U32 }, + [FRA_FWMARK] = { .type = NLA_U32 }, + [FRA_FLOW] = { .type = NLA_U32 }, +}; -void fib_rule_put(struct fib_rule *r) +static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) { - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, fib_rule_put_rcu); - else - printk("Freeing alive rule %p\n", r); - } -} + int err = -EINVAL; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; -/* writer func called from netlink -- rtnl_sem hold*/ + if (frh->src_len > 32 || frh->dst_len > 32 || + (frh->tos & ~IPTOS_TOS_MASK)) + goto errout; -int inet_rtm_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) -{ - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 32 || rtm->rtm_dst_len > 32 || - (rtm->rtm_tos & ~IPTOS_TOS_MASK)) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct fib_table *table; - if (rtm->rtm_type == RTN_UNICAST) { - if ((table = fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = table->tb_id; - } - } + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct fib_table *table; - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 4); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 4); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 4); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = inet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = inet_make_mask(rtm->rtm_dst_len); - new_r->r_tos = rtm->rtm_tos; -#ifdef CONFIG_IP_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); -#endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = __dev_get_by_name(new_r->r_ifname); - if (dev) - new_r->r_ifindex = dev->ifindex; - } -#ifdef CONFIG_NET_CLS_ROUTE - if (rta[RTA_FLOW-1]) - memcpy(&new_r->r_tclassid, RTA_DATA(rta[RTA_FLOW-1]), 4); -#endif - r = container_of(fib_rules.first, struct fib_rule, hlist); + table = fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } - if (!new_r->r_preference) { - if (r && r->hlist.next != NULL) { - r = container_of(r->hlist.next, struct fib_rule, hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; + rule->table = table->tb_id; } } - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); + if (tb[FRA_SRC]) + rule4->src = nla_get_u32(tb[FRA_SRC]); - if (last) - hlist_add_after_rcu(&last->hlist, &new_r->hlist); - else - hlist_add_before_rcu(&new_r->hlist, &r->hlist); + if (tb[FRA_DST]) + rule4->dst = nla_get_u32(tb[FRA_DST]); - rtmsg_rule(RTM_NEWRULE, new_r); - return 0; -} +#ifdef CONFIG_IP_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + rule4->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif #ifdef CONFIG_NET_CLS_ROUTE -u32 fib_rules_tclass(struct fib_result *res) -{ - if (res->r) - return res->r->r_tclassid; - return 0; -} + if (tb[FRA_FLOW]) + rule4->tclassid = nla_get_u32(tb[FRA_FLOW]); #endif -/* callers should hold rtnl semaphore */ - -static void fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; + rule4->src_len = frh->src_len; + rule4->srcmask = inet_make_mask(rule4->src_len); + rule4->dst_len = frh->dst_len; + rule4->dstmask = inet_make_mask(rule4->dst_len); + rule4->tos = frh->tos; - } -} - -/* callers should hold rtnl semaphore */ - -static void fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct fib_rule *r; - - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } + err = 0; +errout: + return err; } -int fib_lookup(const struct flowi *flp, struct fib_result *res) +static int fib4_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - int err; - struct fib_rule *r, *policy; - struct fib_table *tb; - struct hlist_node *node; + struct fib4_rule *rule4 = (struct fib4_rule *) rule; - u32 daddr = flp->fl4_dst; - u32 saddr = flp->fl4_src; + if (frh->src_len && (rule4->src_len != frh->src_len)) + return 0; -FRprintk("Lookup: %u.%u.%u.%u <- %u.%u.%u.%u ", - NIPQUAD(flp->fl4_dst), NIPQUAD(flp->fl4_src)); + if (frh->dst_len && (rule4->dst_len != frh->dst_len)) + return 0; - rcu_read_lock(); + if (frh->tos && (rule4->tos != frh->tos)) + return 0; - hlist_for_each_entry_rcu(r, node, &fib_rules, hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || - (r->r_tos && r->r_tos != flp->fl4_tos) || #ifdef CONFIG_IP_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fl4_fwmark) || + if (tb[FRA_FWMARK] && (rule4->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - -FRprintk("tb %d r %d ", r->r_table, r->r_action); - switch (r->r_action) { - case RTN_UNICAST: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = fib_get_table(r->r_table)) == NULL) - continue; - err = tb->tb_lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } -FRprintk("FAILURE\n"); - rcu_read_unlock(); - return -ENETUNREACH; -} +#ifdef CONFIG_NET_CLS_ROUTE + if (tb[FRA_FLOW] && (rule4->tclassid != nla_get_u32(tb[FRA_FLOW]))) + return 0; +#endif -void fib_select_default(const struct flowi *flp, struct fib_result *res) -{ - if (res->r && res->r->r_action == RTN_UNICAST && - FIB_RES_GW(*res) && FIB_RES_NH(*res).nh_scope == RT_SCOPE_LINK) { - struct fib_table *tb; - if ((tb = fib_get_table(res->r->r_table)) != NULL) - tb->tb_select_default(tb, flp, res); - } -} + if (tb[FRA_SRC] && (rule4->src != nla_get_u32(tb[FRA_SRC]))) + return 0; -static int fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) -{ - struct net_device *dev = ptr; + if (tb[FRA_DST] && (rule4->dst != nla_get_u32(tb[FRA_DST]))) + return 0; - if (event == NETDEV_UNREGISTER) - fib_rules_detach(dev); - else if (event == NETDEV_REGISTER) - fib_rules_attach(dev); - return NOTIFY_DONE; + return 1; } +static int fib4_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) +{ + struct fib4_rule *rule4 = (struct fib4_rule *) rule; -static struct notifier_block fib_rules_notifier = { - .notifier_call =fib_rules_event, -}; + frh->family = AF_INET; + frh->dst_len = rule4->dst_len; + frh->src_len = rule4->src_len; + frh->tos = rule4->tos; -static __inline__ int inet_fill_rule(struct sk_buff *skb, - struct fib_rule *r, - u32 pid, u32 seq, int event, - unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - nlh = NLMSG_NEW(skb, pid, seq, event, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_INET; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = r->r_tos; #ifdef CONFIG_IP_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); + if (rule4->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule4->fwmark); #endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 4, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 4, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 4, &r->r_srcmap); + + if (rule4->dst_len) + NLA_PUT_U32(skb, FRA_DST, rule4->dst); + + if (rule4->src_len) + NLA_PUT_U32(skb, FRA_SRC, rule4->src); + #ifdef CONFIG_NET_CLS_ROUTE - if (r->r_tclassid) - RTA_PUT(skb, RTA_FLOW, 4, &r->r_tclassid); + if (rule4->tclassid) + NLA_PUT_U32(skb, FRA_FLOW, rule4->tclassid); #endif - nlh->nlmsg_len = skb->tail - b; - return skb->len; + return 0; -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; +nla_put_failure: + return -ENOBUFS; } -/* callers should hold rtnl semaphore */ +int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + return fib_rules_dump(skb, cb, AF_INET); +} -static void rtmsg_rule(int event, struct fib_rule *r) +static u32 fib4_rule_default_pref(void) { - int size = NLMSG_SPACE(sizeof(struct rtmsg) + 128); - struct sk_buff *skb = alloc_skb(size, GFP_KERNEL); - - if (!skb) - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, ENOBUFS); - else if (inet_fill_rule(skb, r, 0, 0, event, 0) < 0) { - kfree_skb(skb); - netlink_set_err(rtnl, 0, RTNLGRP_IPV4_RULE, EINVAL); - } else { - netlink_broadcast(rtnl, skb, 0, RTNLGRP_IPV4_RULE, GFP_KERNEL); + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&fib4_rules)) { + pos = fib4_rules.next; + if (pos->next != &fib4_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } + + return 0; } -int inet_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) +static struct fib_rules_ops fib4_rules_ops = { + .family = AF_INET, + .rule_size = sizeof(struct fib4_rule), + .action = fib4_rule_action, + .match = fib4_rule_match, + .configure = fib4_rule_configure, + .compare = fib4_rule_compare, + .fill = fib4_rule_fill, + .default_pref = fib4_rule_default_pref, + .nlgroup = RTNLGRP_IPV4_RULE, + .policy = fib4_rule_policy, + .rules_list = &fib4_rules, + .owner = THIS_MODULE, +}; + +void __init fib4_rules_init(void) { - int idx = 0; - int s_idx = cb->args[0]; - struct fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &fib_rules, hlist) { - if (idx < s_idx) - goto next; - if (inet_fill_rule(skb, r, NETLINK_CB(cb->skb).pid, - cb->nlh->nlmsg_seq, - RTM_NEWRULE, NLM_F_MULTI) < 0) - break; -next: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; + list_add_tail(&local_rule.common.list, &fib4_rules); + list_add_tail(&main_rule.common.list, &fib4_rules); + list_add_tail(&default_rule.common.list, &fib4_rules); - return skb->len; + fib_rules_register(&fib4_rules_ops); } -void __init fib_rules_init(void) +void __exit fib4_rules_cleanup(void) { - INIT_HLIST_HEAD(&fib_rules); - hlist_add_head(&local_rule.hlist, &fib_rules); - hlist_add_after(&local_rule.hlist, &main_rule.hlist); - hlist_add_after(&main_rule.hlist, &default_rule.hlist); - register_netdevice_notifier(&fib_rules_notifier); + fib_rules_unregister(&fib4_rules_ops); } -- cgit v1.3-8-gc7d7 From fe4944e59c357f945f81bc67edb7ed1392e875ad Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Fri, 4 Aug 2006 23:03:05 -0700 Subject: [NETLINK]: Extend netlink messaging interface Adds: nlmsg_get_pos() return current position in message nlmsg_trim() trim part of message nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr nla_put_nohdr(skb, len, data) add attribute w/o hdr nla_find_nested() find attribute in nested attributes Fixes nlmsg_new() to take allocation flags and consider size. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 74 +++++++++++++++++++++++++++++++++++++++++------- kernel/taskstats.c | 2 +- net/netlink/attr.c | 75 +++++++++++++++++++++++++++++++++++++++++++++++++ net/netlink/genetlink.c | 2 +- 4 files changed, 141 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 640c26a90cf1..3a5e40b1e045 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -35,6 +35,8 @@ * nlmsg_put() add a netlink message to an skb * nlmsg_put_answer() callback based nlmsg_put() * nlmsg_end() finanlize netlink message + * nlmsg_get_pos() return current position in message + * nlmsg_trim() trim part of message * nlmsg_cancel() cancel message construction * nlmsg_free() free a netlink message * @@ -80,8 +82,10 @@ * struct nlattr netlink attribtue header * * Attribute Construction: - * nla_reserve(skb, type, len) reserve skb tailroom for an attribute + * nla_reserve(skb, type, len) reserve room for an attribute + * nla_reserve_nohdr(skb, len) reserve room for an attribute w/o hdr * nla_put(skb, type, len, data) add attribute to skb + * nla_put_nohdr(skb, len, data) add attribute w/o hdr * * Attribute Construction for Basic Types: * nla_put_u8(skb, type, value) add u8 attribute to skb @@ -139,6 +143,7 @@ * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes * nla_find() find attribute in stream of attributes + * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets * nla_for_each_attr() loop over all attributes @@ -203,12 +208,18 @@ extern int nla_memcmp(const struct nlattr *nla, const void *data, extern int nla_strcmp(const struct nlattr *nla, const char *str); extern struct nlattr * __nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +extern void * __nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern struct nlattr * nla_reserve(struct sk_buff *skb, int attrtype, int attrlen); +extern void * nla_reserve_nohdr(struct sk_buff *skb, int attrlen); extern void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +extern void __nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); extern int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data); +extern int nla_put_nohdr(struct sk_buff *skb, int attrlen, + const void *data); /************************************************************************** * Netlink Messages @@ -453,12 +464,13 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, /** * nlmsg_new - Allocate a new netlink message * @size: maximum size of message + * @flags: the type of memory to allocate. * * Use NLMSG_GOODSIZE if size isn't know and you need a good default size. */ -static inline struct sk_buff *nlmsg_new(int size) +static inline struct sk_buff *nlmsg_new(int size, gfp_t flags) { - return alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); + return alloc_skb(size, flags); } /** @@ -479,6 +491,32 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) return skb->len; } +/** + * nlmsg_get_pos - return current position in netlink message + * @skb: socket buffer the message is stored in + * + * Returns a pointer to the current tail of the message. + */ +static inline void *nlmsg_get_pos(struct sk_buff *skb) +{ + return skb->tail; +} + +/** + * nlmsg_trim - Trim message to a mark + * @skb: socket buffer the message is stored in + * @mark: mark to trim to + * + * Trims the message to the provided mark. Returns -1. + */ +static inline int nlmsg_trim(struct sk_buff *skb, void *mark) +{ + if (mark) + skb_trim(skb, (unsigned char *) mark - skb->data); + + return -1; +} + /** * nlmsg_cancel - Cancel construction of a netlink message * @skb: socket buffer the message is stored in @@ -489,9 +527,7 @@ static inline int nlmsg_end(struct sk_buff *skb, struct nlmsghdr *nlh) */ static inline int nlmsg_cancel(struct sk_buff *skb, struct nlmsghdr *nlh) { - skb_trim(skb, (unsigned char *) nlh - skb->data); - - return -1; + return nlmsg_trim(skb, nlh); } /** @@ -630,6 +666,18 @@ static inline struct nlattr *nla_next(const struct nlattr *nla, int *remaining) return (struct nlattr *) ((char *) nla + totlen); } +/** + * nla_find_nested - find attribute in a set of nested attributes + * @nla: attribute containing the nested attributes + * @attrtype: type of attribute to look for + * + * Returns the first attribute which matches the specified type. + */ +static inline struct nlattr *nla_find_nested(struct nlattr *nla, int attrtype) +{ + return nla_find(nla_data(nla), nla_len(nla), attrtype); +} + /** * nla_parse_nested - parse nested attributes * @tb: destination array with maxtype+1 elements @@ -862,10 +910,7 @@ static inline int nla_nest_end(struct sk_buff *skb, struct nlattr *start) */ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) { - if (start) - skb_trim(skb, (unsigned char *) start - skb->data); - - return -1; + return nlmsg_trim(skb, start); } /** @@ -880,4 +925,13 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) nla_ok(pos, rem); \ pos = nla_next(pos, &(rem))) +/** + * nla_for_each_nested - iterate over nested attributes + * @pos: loop counter, set to current attribute + * @nla: attribute containing the nested attributes + * @rem: initialized to len, holds bytes currently remaining in stream + */ +#define nla_for_each_nested(pos, nla, rem) \ + nla_for_each_attr(pos, nla_data(nla), nla_len(nla), rem) + #endif diff --git a/kernel/taskstats.c b/kernel/taskstats.c index e78187657330..2ed4040d0dc5 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -75,7 +75,7 @@ static int prepare_reply(struct genl_info *info, u8 cmd, struct sk_buff **skbp, /* * If new attributes are added, please revisit this allocation */ - skb = nlmsg_new(size); + skb = nlmsg_new(size, GFP_KERNEL); if (!skb) return -ENOMEM; diff --git a/net/netlink/attr.c b/net/netlink/attr.c index dddbd15135a8..136e529e5780 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -254,6 +254,26 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return nla; } +/** + * __nla_reserve_nohdr - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @attrlen: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the payload. + */ +void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + void *start; + + start = skb_put(skb, NLA_ALIGN(attrlen)); + memset(start, 0, NLA_ALIGN(attrlen)); + + return start; +} + /** * nla_reserve - reserve room for attribute on the skb * @skb: socket buffer to reserve room on @@ -274,6 +294,24 @@ struct nlattr *nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) return __nla_reserve(skb, attrtype, attrlen); } +/** + * nla_reserve - reserve room for attribute without header + * @skb: socket buffer to reserve room on + * @len: length of attribute payload + * + * Reserves room for attribute payload without a header. + * + * Returns NULL if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +void *nla_reserve_nohdr(struct sk_buff *skb, int attrlen) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return NULL; + + return __nla_reserve_nohdr(skb, attrlen); +} + /** * __nla_put - Add a netlink attribute to a socket buffer * @skb: socket buffer to add attribute to @@ -293,6 +331,22 @@ void __nla_put(struct sk_buff *skb, int attrtype, int attrlen, memcpy(nla_data(nla), data, attrlen); } +/** + * __nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * The caller is responsible to ensure that the skb provides enough + * tailroom for the attribute payload. + */ +void __nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + void *start; + + start = __nla_reserve_nohdr(skb, attrlen); + memcpy(start, data, attrlen); +} /** * nla_put - Add a netlink attribute to a socket buffer @@ -313,15 +367,36 @@ int nla_put(struct sk_buff *skb, int attrtype, int attrlen, const void *data) return 0; } +/** + * nla_put_nohdr - Add a netlink attribute without header + * @skb: socket buffer to add attribute to + * @attrlen: length of attribute payload + * @data: head of attribute payload + * + * Returns -1 if the tailroom of the skb is insufficient to store + * the attribute payload. + */ +int nla_put_nohdr(struct sk_buff *skb, int attrlen, const void *data) +{ + if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) + return -1; + + __nla_put_nohdr(skb, attrlen, data); + return 0; +} EXPORT_SYMBOL(nla_validate); EXPORT_SYMBOL(nla_parse); EXPORT_SYMBOL(nla_find); EXPORT_SYMBOL(nla_strlcpy); EXPORT_SYMBOL(__nla_reserve); +EXPORT_SYMBOL(__nla_reserve_nohdr); EXPORT_SYMBOL(nla_reserve); +EXPORT_SYMBOL(nla_reserve_nohdr); EXPORT_SYMBOL(__nla_put); +EXPORT_SYMBOL(__nla_put_nohdr); EXPORT_SYMBOL(nla_put); +EXPORT_SYMBOL(nla_put_nohdr); EXPORT_SYMBOL(nla_memcpy); EXPORT_SYMBOL(nla_memcmp); EXPORT_SYMBOL(nla_strcmp); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index a298f77cc3e3..75bb47a898dd 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -440,7 +440,7 @@ static struct sk_buff *ctrl_build_msg(struct genl_family *family, u32 pid, struct sk_buff *skb; int err; - skb = nlmsg_new(NLMSG_GOODSIZE); + skb = nlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL); if (skb == NULL) return ERR_PTR(-ENOBUFS); -- cgit v1.3-8-gc7d7 From 9067c722cf6930adf1df2d169de9094dd90b0c33 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 7 Aug 2006 17:57:44 -0700 Subject: [NEIGH]: Move netlink neighbour bits to linux/neighbour.h Moves netlink neighbour bits to linux/neighbour.h. Also moves bits to be exported to userspace from net/neighbour.h to linux/neighbour.h and removes __KERNEL__ guards, userspace is not supposed to be using it. rtnetlink_rcv_msg() is not longer required to parse attributes for the neighbour layer, remove dependency on obsolete and buggy rta_buf. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/neighbour.h | 65 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/rtnetlink.h | 63 --------------------------------------------- include/net/neighbour.h | 39 ++-------------------------- net/core/rtnetlink.c | 2 -- 4 files changed, 67 insertions(+), 102 deletions(-) create mode 100644 include/linux/neighbour.h (limited to 'include/net') diff --git a/include/linux/neighbour.h b/include/linux/neighbour.h new file mode 100644 index 000000000000..8e8293d86fbb --- /dev/null +++ b/include/linux/neighbour.h @@ -0,0 +1,65 @@ +#ifndef __LINUX_NEIGHBOUR_H +#define __LINUX_NEIGHBOUR_H + +#include + +struct ndmsg +{ + __u8 ndm_family; + __u8 ndm_pad1; + __u16 ndm_pad2; + __s32 ndm_ifindex; + __u16 ndm_state; + __u8 ndm_flags; + __u8 ndm_type; +}; + +enum +{ + NDA_UNSPEC, + NDA_DST, + NDA_LLADDR, + NDA_CACHEINFO, + NDA_PROBES, + __NDA_MAX +}; + +#define NDA_MAX (__NDA_MAX - 1) + +/* + * Neighbor Cache Entry Flags + */ + +#define NTF_PROXY 0x08 /* == ATF_PUBL */ +#define NTF_ROUTER 0x80 + +/* + * Neighbor Cache Entry States. + */ + +#define NUD_INCOMPLETE 0x01 +#define NUD_REACHABLE 0x02 +#define NUD_STALE 0x04 +#define NUD_DELAY 0x08 +#define NUD_PROBE 0x10 +#define NUD_FAILED 0x20 + +/* Dummy states */ +#define NUD_NOARP 0x40 +#define NUD_PERMANENT 0x80 +#define NUD_NONE 0x00 + +/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change + and make no address resolution or NUD. + NUD_PERMANENT is also cannot be deleted by garbage collectors. + */ + +struct nda_cacheinfo +{ + __u32 ndm_confirmed; + __u32 ndm_used; + __u32 ndm_updated; + __u32 ndm_refcnt; +}; + +#endif diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 84f3eb426da2..9750f0214c22 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -386,69 +386,6 @@ struct rta_session -/************************************************************** - * Neighbour discovery. - ****/ - -struct ndmsg -{ - unsigned char ndm_family; - unsigned char ndm_pad1; - unsigned short ndm_pad2; - int ndm_ifindex; /* Link index */ - __u16 ndm_state; - __u8 ndm_flags; - __u8 ndm_type; -}; - -enum -{ - NDA_UNSPEC, - NDA_DST, - NDA_LLADDR, - NDA_CACHEINFO, - NDA_PROBES, - __NDA_MAX -}; - -#define NDA_MAX (__NDA_MAX - 1) - -#define NDA_RTA(r) ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct ndmsg)))) -#define NDA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct ndmsg)) - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - - -struct nda_cacheinfo -{ - __u32 ndm_confirmed; - __u32 ndm_used; - __u32 ndm_updated; - __u32 ndm_refcnt; -}; - - /***************************************************************** * Neighbour tables specific messages. * diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 4901ee446879..74c4b6ff8a5c 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -1,6 +1,8 @@ #ifndef _NET_NEIGHBOUR_H #define _NET_NEIGHBOUR_H +#include + /* * Generic neighbour manipulation * @@ -14,40 +16,6 @@ * - Add neighbour cache statistics like rtstat */ -/* The following flags & states are exported to user space, - so that they should be moved to include/linux/ directory. - */ - -/* - * Neighbor Cache Entry Flags - */ - -#define NTF_PROXY 0x08 /* == ATF_PUBL */ -#define NTF_ROUTER 0x80 - -/* - * Neighbor Cache Entry States. - */ - -#define NUD_INCOMPLETE 0x01 -#define NUD_REACHABLE 0x02 -#define NUD_STALE 0x04 -#define NUD_DELAY 0x08 -#define NUD_PROBE 0x10 -#define NUD_FAILED 0x20 - -/* Dummy states */ -#define NUD_NOARP 0x40 -#define NUD_PERMANENT 0x80 -#define NUD_NONE 0x00 - -/* NUD_NOARP & NUD_PERMANENT are pseudostates, they never change - and make no address resolution or NUD. - NUD_PERMANENT is also cannot be deleted by garbage collectors. - */ - -#ifdef __KERNEL__ - #include #include #include @@ -374,6 +342,3 @@ struct neighbour_cb { #define NEIGH_CB(skb) ((struct neighbour_cb *)(skb)->cb) #endif -#endif - - diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 93ba04fb8444..78ccbd4c4e37 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -104,7 +104,6 @@ static const int rtm_min[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = NLMSG_LENGTH(sizeof(struct ifinfomsg)), [RTM_FAM(RTM_NEWADDR)] = NLMSG_LENGTH(sizeof(struct ifaddrmsg)), [RTM_FAM(RTM_NEWROUTE)] = NLMSG_LENGTH(sizeof(struct rtmsg)), - [RTM_FAM(RTM_NEWNEIGH)] = NLMSG_LENGTH(sizeof(struct ndmsg)), [RTM_FAM(RTM_NEWRULE)] = NLMSG_LENGTH(sizeof(struct fib_rule_hdr)), [RTM_FAM(RTM_NEWQDISC)] = NLMSG_LENGTH(sizeof(struct tcmsg)), [RTM_FAM(RTM_NEWTCLASS)] = NLMSG_LENGTH(sizeof(struct tcmsg)), @@ -121,7 +120,6 @@ static const int rta_max[RTM_NR_FAMILIES] = [RTM_FAM(RTM_NEWLINK)] = IFLA_MAX, [RTM_FAM(RTM_NEWADDR)] = IFA_MAX, [RTM_FAM(RTM_NEWROUTE)] = RTA_MAX, - [RTM_FAM(RTM_NEWNEIGH)] = NDA_MAX, [RTM_FAM(RTM_NEWRULE)] = FRA_MAX, [RTM_FAM(RTM_NEWQDISC)] = TCA_MAX, [RTM_FAM(RTM_NEWTCLASS)] = TCA_MAX, -- cgit v1.3-8-gc7d7 From 8ce11e6a9faf1f1c849b77104adc1642c46aee95 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 7 Aug 2006 21:50:48 -0700 Subject: [NET]: Make code static. This patch makes needlessly global code static. Signed-off-by: Adrian Bunk Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 4 ---- net/ipv4/cipso_ipv4.c | 2 +- net/ipv4/fib_rules.c | 4 ++-- net/ipv6/fib6_rules.c | 4 ++-- net/ipv6/ip6_fib.c | 6 +++--- net/ipv6/route.c | 6 +++--- net/netlabel/netlabel_domainhash.c | 4 ++-- 7 files changed, 13 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 7b47e8d5a765..c0660cea9a2f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -192,10 +192,6 @@ struct fib6_node *fib6_locate(struct fib6_node *root, struct in6_addr *daddr, int dst_len, struct in6_addr *saddr, int src_len); -extern void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg); - extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg); diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index b82a101c95c5..80a2a0911b49 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -60,7 +60,7 @@ struct cipso_v4_domhsh_entry { * if in practice there are a lot of different DOIs this list should * probably be turned into a hash table or something similar so we * can do quick lookups. */ -DEFINE_SPINLOCK(cipso_v4_doi_list_lock); +static DEFINE_SPINLOCK(cipso_v4_doi_list_lock); static struct list_head cipso_v4_doi_list = LIST_HEAD_INIT(cipso_v4_doi_list); /* Label mapping cache */ diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 23ec6ae1a0f6..03d1e8a43a48 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -101,8 +101,8 @@ int fib_lookup(struct flowi *flp, struct fib_result *res) return err; } -int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, - struct fib_lookup_arg *arg) +static int fib4_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct fib_table *tbl; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 94a46ec967a4..bf9bba83b852 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -66,8 +66,8 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, return (struct dst_entry *) arg.result; } -int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, - int flags, struct fib_lookup_arg *arg) +static int fib6_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { struct rt6_info *rt = NULL; struct fib6_table *table; diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index ce226c14bef5..1f2316187ca4 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -1169,9 +1169,9 @@ static int fib6_clean_node(struct fib6_walker_t *w) * ignoring pure split nodes) will be scanned. */ -void fib6_clean_tree(struct fib6_node *root, - int (*func)(struct rt6_info *, void *arg), - int prune, void *arg) +static void fib6_clean_tree(struct fib6_node *root, + int (*func)(struct rt6_info *, void *arg), + int prune, void *arg) { struct fib6_cleaner_t c; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 41c5905d3191..e08d84063c1f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -613,8 +613,8 @@ static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, struct in6_addr *d return rt; } -struct rt6_info *ip6_pol_route_input(struct fib6_table *table, struct flowi *fl, - int flags) +static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, + struct flowi *fl, int flags) { struct fib6_node *fn; struct rt6_info *rt, *nrt; @@ -872,7 +872,7 @@ static inline unsigned int ipv6_advmss(unsigned int mtu) } static struct dst_entry *ndisc_dst_gc_list; -DEFINE_SPINLOCK(ndisc_lock); +static DEFINE_SPINLOCK(ndisc_lock); struct dst_entry *ndisc_dst_alloc(struct net_device *dev, struct neighbour *neigh, diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 5bb3fad4a115..0489a1378101 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -50,11 +50,11 @@ struct netlbl_domhsh_tbl { /* Domain hash table */ /* XXX - updates should be so rare that having one spinlock for the entire * hash table should be okay */ -DEFINE_SPINLOCK(netlbl_domhsh_lock); +static DEFINE_SPINLOCK(netlbl_domhsh_lock); static struct netlbl_domhsh_tbl *netlbl_domhsh = NULL; /* Default domain mapping */ -DEFINE_SPINLOCK(netlbl_domhsh_def_lock); +static DEFINE_SPINLOCK(netlbl_domhsh_def_lock); static struct netlbl_dom_map *netlbl_domhsh_def = NULL; /* -- cgit v1.3-8-gc7d7 From 0298f36a579b5bd7f10f6f6d57e5929977a865a1 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 7 Aug 2006 21:56:52 -0700 Subject: [IPV4]: Kill fib4_rules_clean(). As noted by Adrian Bunk this function is totally unused. Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 - net/ipv4/fib_rules.c | 5 ----- 2 files changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 14c82e611c95..adf73586bc05 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -254,7 +254,6 @@ extern struct fib_table *fib_hash_init(int id); extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); extern void __init fib4_rules_init(void); -extern void __exit fib4_rules_cleanup(void); #ifdef CONFIG_NET_CLS_ROUTE extern u32 fib_rules_tclass(struct fib_result *res); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 03d1e8a43a48..d242e5291fcc 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -347,8 +347,3 @@ void __init fib4_rules_init(void) fib_rules_register(&fib4_rules_ops); } - -void __exit fib4_rules_cleanup(void) -{ - fib_rules_unregister(&fib4_rules_ops); -} -- cgit v1.3-8-gc7d7 From 1a01912ae0a5666c4c24eaae2b4821711e2ad79a Mon Sep 17 00:00:00 2001 From: Louis Nyffenegger Date: Tue, 8 Aug 2006 00:56:11 -0700 Subject: [INET]: Remove is_setbyuser patch The value is_setbyuser from struct ip_options is never used and set only one time (http://linux-net.osdl.org/index.php/TODO#IPV4). This little patch removes it from the kernel source. Signed-off-by: Louis Nyffenegger Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 +--- net/ipv4/ip_options.c | 1 - 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f4caad56cd03..f6242710f2ff 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -27,7 +27,6 @@ /** struct ip_options - IP Options * * @faddr - Saved first hop address - * @is_setbyuser - Set by setsockopt? * @is_data - Options in __data, rather than skb * @is_strictroute - Strict source route * @srr_is_hit - Packet destination addr was our one @@ -42,8 +41,7 @@ struct ip_options { unsigned char srr; unsigned char rr; unsigned char ts; - unsigned char is_setbyuser:1, - is_data:1, + unsigned char is_data:1, is_strictroute:1, srr_is_hit:1, is_changed:1, diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e0a93b4fa8cc..e7437c091326 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -525,7 +525,6 @@ static int ip_options_get_finish(struct ip_options **optp, opt->__data[optlen++] = IPOPT_END; opt->optlen = optlen; opt->is_data = 1; - opt->is_setbyuser = 1; if (optlen && ip_options_compile(opt, NULL)) { kfree(opt); return -EINVAL; -- cgit v1.3-8-gc7d7 From 99a92ff50424146ba01a222248fd47a1cd55b78f Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Tue, 8 Aug 2006 02:18:10 -0700 Subject: [IPV4]: Uninline inet_lookup_listener By modern standards this function is way too big to be inlined. It's even bigger than __inet_lookup_listener :) Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 33 ++------------------------------- net/ipv4/inet_hashtables.c | 35 ++++++++++++++++++++++++++++++++--- 2 files changed, 34 insertions(+), 34 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index 98e0bb3014fe..bd513f3b9c7e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -271,39 +271,10 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -extern struct sock *__inet_lookup_listener(const struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif); - -/* Optimize the common listener case. */ -static inline struct sock * +extern struct sock * inet_lookup_listener(struct inet_hashinfo *hashinfo, const u32 daddr, - const unsigned short hnum, const int dif) -{ - struct sock *sk = NULL; - const struct hlist_head *head; - - read_lock(&hashinfo->lhash_lock); - head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; - if (!hlist_empty(head)) { - const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); - - if (inet->num == hnum && !sk->sk_node.next && - (!inet->rcv_saddr || inet->rcv_saddr == daddr) && - (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && - !sk->sk_bound_dev_if) - goto sherry_cache; - sk = __inet_lookup_listener(head, daddr, hnum, dif); - } - if (sk) { -sherry_cache: - sock_hold(sk); - } - read_unlock(&hashinfo->lhash_lock); - return sk; -} + const unsigned short hnum, const int dif); /* Socket demux engine toys. */ #ifdef __BIG_ENDIAN diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 95fac5532994..bfc39066e730 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -124,8 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 daddr, - const unsigned short hnum, const int dif) +static struct sock *__inet_lookup_listener(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -159,7 +161,34 @@ struct sock *__inet_lookup_listener(const struct hlist_head *head, const u32 dad return result; } -EXPORT_SYMBOL_GPL(__inet_lookup_listener); +/* Optimize the common listener case. */ +struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, const unsigned short hnum, + const int dif) +{ + struct sock *sk = NULL; + const struct hlist_head *head; + + read_lock(&hashinfo->lhash_lock); + head = &hashinfo->listening_hash[inet_lhashfn(hnum)]; + if (!hlist_empty(head)) { + const struct inet_sock *inet = inet_sk((sk = __sk_head(head))); + + if (inet->num == hnum && !sk->sk_node.next && + (!inet->rcv_saddr || inet->rcv_saddr == daddr) && + (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && + !sk->sk_bound_dev_if) + goto sherry_cache; + sk = __inet_lookup_listener(head, daddr, hnum, dif); + } + if (sk) { +sherry_cache: + sock_hold(sk); + } + read_unlock(&hashinfo->lhash_lock); + return sk; +} +EXPORT_SYMBOL_GPL(inet_lookup_listener); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, -- cgit v1.3-8-gc7d7 From 8f491069b40be5d627007a343f99759e9da6a178 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 9 Aug 2006 15:47:12 -0700 Subject: [IPV4]: Use network-order dport for all visible inet_lookup_* Right now most inet_lookup_* functions take a host-order hnum instead of a network-order dport because that's how it is represented internally. This means that users of these functions have to be careful about using the right byte-order. To add more confusion, inet_lookup takes a network-order dport unlike all other functions. So this patch changes all visible inet_lookup functions to take a dport and move all dport->hnum conversion inside them. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 31 ++++++++++++++++++++++++------- net/dccp/ipv4.c | 10 +++++----- net/ipv4/inet_hashtables.c | 18 +++++++++--------- net/ipv4/tcp_ipv4.c | 10 +++++----- 4 files changed, 43 insertions(+), 26 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index bd513f3b9c7e..b4491c9e2a5a 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -271,10 +271,16 @@ static inline int inet_iif(const struct sk_buff *skb) return ((struct rtable *)skb->dst)->rt_iif; } -extern struct sock * - inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, - const unsigned short hnum, const int dif); +extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, + const unsigned short hnum, + const int dif); + +static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, + u32 daddr, u16 dport, int dif) +{ + return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); +} /* Socket demux engine toys. */ #ifdef __BIG_ENDIAN @@ -362,14 +368,25 @@ hit: goto out; } +static inline struct sock * + inet_lookup_established(struct inet_hashinfo *hashinfo, + const u32 saddr, const u16 sport, + const u32 daddr, const u16 dport, + const int dif) +{ + return __inet_lookup_established(hashinfo, saddr, sport, daddr, + ntohs(dport), dif); +} + static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, const u32 saddr, const u16 sport, - const u32 daddr, const u16 hnum, + const u32 daddr, const u16 dport, const int dif) { + u16 hnum = ntohs(dport); struct sock *sk = __inet_lookup_established(hashinfo, saddr, sport, daddr, hnum, dif); - return sk ? : inet_lookup_listener(hashinfo, daddr, hnum, dif); + return sk ? : __inet_lookup_listener(hashinfo, daddr, hnum, dif); } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, @@ -380,7 +397,7 @@ static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, struct sock *sk; local_bh_disable(); - sk = __inet_lookup(hashinfo, saddr, sport, daddr, ntohs(dport), dif); + sk = __inet_lookup(hashinfo, saddr, sport, daddr, dport, dif); local_bh_enable(); return sk; diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 171d363876ee..9a1a76a7dc41 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -608,10 +608,10 @@ static struct sock *dccp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req != NULL) return dccp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&dccp_hashinfo, - iph->saddr, dh->dccph_sport, - iph->daddr, ntohs(dh->dccph_dport), - inet_iif(skb)); + nsk = inet_lookup_established(&dccp_hashinfo, + iph->saddr, dh->dccph_sport, + iph->daddr, dh->dccph_dport, + inet_iif(skb)); if (nsk != NULL) { if (nsk->sk_state != DCCP_TIME_WAIT) { bh_lock_sock(nsk); @@ -925,7 +925,7 @@ static int dccp_v4_rcv(struct sk_buff *skb) * Look up flow ID in table and get corresponding socket */ sk = __inet_lookup(&dccp_hashinfo, skb->nh.iph->saddr, dh->dccph_sport, - skb->nh.iph->daddr, ntohs(dh->dccph_dport), + skb->nh.iph->daddr, dh->dccph_dport, inet_iif(skb)); /* diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index bfc39066e730..fb296c9a7f3f 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -124,10 +124,10 @@ EXPORT_SYMBOL(inet_listen_wlock); * remote address for the connection. So always assume those are both * wildcarded during the search since they can never be otherwise. */ -static struct sock *__inet_lookup_listener(const struct hlist_head *head, - const u32 daddr, - const unsigned short hnum, - const int dif) +static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, + const u32 daddr, + const unsigned short hnum, + const int dif) { struct sock *result = NULL, *sk; const struct hlist_node *node; @@ -162,9 +162,9 @@ static struct sock *__inet_lookup_listener(const struct hlist_head *head, } /* Optimize the common listener case. */ -struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, const unsigned short hnum, - const int dif) +struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, + const u32 daddr, const unsigned short hnum, + const int dif) { struct sock *sk = NULL; const struct hlist_head *head; @@ -179,7 +179,7 @@ struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, (sk->sk_family == PF_INET || !ipv6_only_sock(sk)) && !sk->sk_bound_dev_if) goto sherry_cache; - sk = __inet_lookup_listener(head, daddr, hnum, dif); + sk = inet_lookup_listener_slow(head, daddr, hnum, dif); } if (sk) { sherry_cache: @@ -188,7 +188,7 @@ sherry_cache: read_unlock(&hashinfo->lhash_lock); return sk; } -EXPORT_SYMBOL_GPL(inet_lookup_listener); +EXPORT_SYMBOL_GPL(__inet_lookup_listener); /* called with local bh disabled */ static int __inet_check_established(struct inet_timewait_death_row *death_row, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index b2aa512a30e9..2973dee0a489 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -951,9 +951,9 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) if (req) return tcp_check_req(sk, skb, req, prev); - nsk = __inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, - th->source, skb->nh.iph->daddr, - ntohs(th->dest), inet_iif(skb)); + nsk = inet_lookup_established(&tcp_hashinfo, skb->nh.iph->saddr, + th->source, skb->nh.iph->daddr, + th->dest, inet_iif(skb)); if (nsk) { if (nsk->sk_state != TCP_TIME_WAIT) { @@ -1090,7 +1090,7 @@ int tcp_v4_rcv(struct sk_buff *skb) TCP_SKB_CB(skb)->sacked = 0; sk = __inet_lookup(&tcp_hashinfo, skb->nh.iph->saddr, th->source, - skb->nh.iph->daddr, ntohs(th->dest), + skb->nh.iph->daddr, th->dest, inet_iif(skb)); if (!sk) @@ -1168,7 +1168,7 @@ do_time_wait: case TCP_TW_SYN: { struct sock *sk2 = inet_lookup_listener(&tcp_hashinfo, skb->nh.iph->daddr, - ntohs(th->dest), + th->dest, inet_iif(skb)); if (sk2) { inet_twsk_deschedule((struct inet_timewait_sock *)sk, -- cgit v1.3-8-gc7d7 From a8731cbf61c8768ea129780b70dc7dfc6795aad4 Mon Sep 17 00:00:00 2001 From: Steven Whitehouse Date: Wed, 9 Aug 2006 15:56:46 -0700 Subject: [DECNET]: Covert rules to use generic code This patch converts the DECnet rules code to use the generic rules system created by Thomas Graf . Signed-off-by: Steven Whitehouse Acked-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +- include/net/dn_fib.h | 8 +- net/decnet/Kconfig | 1 + net/decnet/af_decnet.c | 1 + net/decnet/dn_dev.c | 3 +- net/decnet/dn_fib.c | 1 + net/decnet/dn_route.c | 3 +- net/decnet/dn_rules.c | 494 +++++++++++++++++----------------------------- net/decnet/dn_table.c | 1 + 9 files changed, 196 insertions(+), 319 deletions(-) (limited to 'include/net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 784a1a29490e..0aaffa2ae666 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -534,7 +534,8 @@ enum rtnetlink_groups { RTNLGRP_NOP2, RTNLGRP_DECnet_ROUTE, #define RTNLGRP_DECnet_ROUTE RTNLGRP_DECnet_ROUTE - RTNLGRP_NOP3, + RTNLGRP_DECnet_RULE, +#define RTNLGRP_DECnet_RULE RTNLGRP_DECnet_RULE RTNLGRP_NOP4, RTNLGRP_IPV6_PREFIX, #define RTNLGRP_IPV6_PREFIX RTNLGRP_IPV6_PREFIX diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index a15dcf0d5c1e..32bc8ce5c5ce 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -22,7 +22,7 @@ struct dn_kern_rta }; struct dn_fib_res { - struct dn_fib_rule *r; + struct fib_rule *r; struct dn_fib_info *fi; unsigned char prefixlen; unsigned char nh_sel; @@ -147,10 +147,8 @@ extern void dn_fib_table_cleanup(void); */ extern void dn_fib_rules_init(void); extern void dn_fib_rules_cleanup(void); -extern void dn_fib_rule_put(struct dn_fib_rule *); -extern __le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags); extern unsigned dnet_addr_type(__le16 addr); -extern int dn_fib_lookup(const struct flowi *fl, struct dn_fib_res *res); +extern int dn_fib_lookup(struct flowi *fl, struct dn_fib_res *res); /* * rtnetlink interface @@ -176,7 +174,7 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) if (res->fi) dn_fib_info_put(res->fi); if (res->r) - dn_fib_rule_put(res->r); + fib_rule_put(res->r); } extern struct dn_fib_table *dn_fib_tables[]; diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig index 92f2ec46fd22..36e72cb145b0 100644 --- a/net/decnet/Kconfig +++ b/net/decnet/Kconfig @@ -27,6 +27,7 @@ config DECNET config DECNET_ROUTER bool "DECnet: router support (EXPERIMENTAL)" depends on DECNET && EXPERIMENTAL + select FIB_RULES ---help--- Add support for turning your DECnet Endnode into a level 1 or 2 router. This is an experimental, but functional option. If you diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index 5486247735f6..70e027375682 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -130,6 +130,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index 632c5a90b589..88ea7a13bb24 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include @@ -1418,8 +1419,6 @@ static struct rtnetlink_link dnet_rtnetlink_table[RTM_NR_MSGTYPES] = [RTM_DELROUTE - RTM_BASE] = { .doit = dn_fib_rtm_delroute, }, [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, .dumpit = dn_fib_dump, }, - [RTM_NEWRULE - RTM_BASE] = { .doit = dn_fib_rtm_newrule, }, - [RTM_DELRULE - RTM_BASE] = { .doit = dn_fib_rtm_delrule, }, [RTM_GETRULE - RTM_BASE] = { .dumpit = dn_fib_dump_rules, }, #else [RTM_GETROUTE - RTM_BASE] = { .doit = dn_cache_getroute, diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fa20e2efcfc1..846df3954a63 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 743e9fcf7c5a..5e6f4616ca10 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -80,6 +80,7 @@ #include #include #include +#include #include #include #include @@ -1284,7 +1285,7 @@ static int dn_route_input_slow(struct sk_buff *skb) dev_hold(out_dev); if (res.r) - src_map = dn_fib_rules_policy(fl.fld_src, &res, &flags); + src_map = fl.fld_src; /* no NAT support for now */ gateway = DN_FIB_RES_GW(res); if (res.type == RTN_NAT) { diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 6986be754ef2..096f1273e714 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -11,259 +11,198 @@ * * * Changes: + * Steve Whitehouse + * Updated for Thomas Graf's generic rules * */ -#include #include -#include -#include #include -#include #include #include -#include #include -#include #include -#include #include #include -#include -#include #include #include #include +#include #include #include #include #include +static struct fib_rules_ops dn_fib_rules_ops; + struct dn_fib_rule { - struct hlist_node r_hlist; - atomic_t r_clntref; - u32 r_preference; - unsigned char r_table; - unsigned char r_action; - unsigned char r_dst_len; - unsigned char r_src_len; - __le16 r_src; - __le16 r_srcmask; - __le16 r_dst; - __le16 r_dstmask; - __le16 r_srcmap; - u8 r_flags; + struct fib_rule common; + unsigned char dst_len; + unsigned char src_len; + __le16 src; + __le16 srcmask; + __le16 dst; + __le16 dstmask; + __le16 srcmap; + u8 flags; #ifdef CONFIG_DECNET_ROUTE_FWMARK - u32 r_fwmark; + u32 fwmark; #endif - int r_ifindex; - char r_ifname[IFNAMSIZ]; - int r_dead; - struct rcu_head rcu; }; static struct dn_fib_rule default_rule = { - .r_clntref = ATOMIC_INIT(2), - .r_preference = 0x7fff, - .r_table = RT_TABLE_MAIN, - .r_action = RTN_UNICAST + .common = { + .refcnt = ATOMIC_INIT(2), + .pref = 0x7fff, + .table = RT_TABLE_MAIN, + .action = FR_ACT_TO_TBL, + }, }; -static struct hlist_head dn_fib_rules; +static LIST_HEAD(dn_fib_rules); + -int dn_fib_rtm_delrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r; - struct hlist_node *node; - int err = -ESRCH; - - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if ((!rta[RTA_SRC-1] || memcmp(RTA_DATA(rta[RTA_SRC-1]), &r->r_src, 2) == 0) && - rtm->rtm_src_len == r->r_src_len && - rtm->rtm_dst_len == r->r_dst_len && - (!rta[RTA_DST-1] || memcmp(RTA_DATA(rta[RTA_DST-1]), &r->r_dst, 2) == 0) && -#ifdef CONFIG_DECNET_ROUTE_FWMARK - (!rta[RTA_PROTOINFO-1] || memcmp(RTA_DATA(rta[RTA_PROTOINFO-1]), &r->r_fwmark, 4) == 0) && -#endif - (!rtm->rtm_type || rtm->rtm_type == r->r_action) && - (!rta[RTA_PRIORITY-1] || memcmp(RTA_DATA(rta[RTA_PRIORITY-1]), &r->r_preference, 4) == 0) && - (!rta[RTA_IIF-1] || rtattr_strcmp(rta[RTA_IIF-1], r->r_ifname) == 0) && - (!rtm->rtm_table || (r && rtm->rtm_table == r->r_table))) { - - err = -EPERM; - if (r == &default_rule) - break; - - hlist_del_rcu(&r->r_hlist); - r->r_dead = 1; - dn_fib_rule_put(r); - err = 0; - break; - } - } + struct fib_lookup_arg arg = { + .result = res, + }; + int err; + + err = fib_rules_lookup(&dn_fib_rules_ops, flp, 0, &arg); + res->r = arg.rule; return err; } -static inline void dn_fib_rule_put_rcu(struct rcu_head *head) +int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, + struct fib_lookup_arg *arg) { - struct dn_fib_rule *r = container_of(head, struct dn_fib_rule, rcu); - kfree(r); -} + int err = -EAGAIN; + struct dn_fib_table *tbl; -void dn_fib_rule_put(struct dn_fib_rule *r) -{ - if (atomic_dec_and_test(&r->r_clntref)) { - if (r->r_dead) - call_rcu(&r->rcu, dn_fib_rule_put_rcu); - else - printk(KERN_DEBUG "Attempt to free alive dn_fib_rule\n"); + switch(rule->action) { + case FR_ACT_TO_TBL: + break; + + case FR_ACT_UNREACHABLE: + err = -ENETUNREACH; + goto errout; + + case FR_ACT_PROHIBIT: + err = -EACCES; + goto errout; + + case FR_ACT_BLACKHOLE: + default: + err = -EINVAL; + goto errout; } + + tbl = dn_fib_get_table(rule->table, 0); + if (tbl == NULL) + goto errout; + + err = tbl->lookup(tbl, flp, (struct dn_fib_res *)arg->result); + if (err > 0) + err = -EAGAIN; +errout: + return err; } +static struct nla_policy dn_fib_rule_policy[FRA_MAX+1] __read_mostly = { + [FRA_IFNAME] = { .type = NLA_STRING }, + [FRA_PRIORITY] = { .type = NLA_U32 }, + [FRA_SRC] = { .type = NLA_U16 }, + [FRA_DST] = { .type = NLA_U16 }, + [FRA_FWMARK] = { .type = NLA_U32 }, +}; -int dn_fib_rtm_newrule(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) +static int dn_fib_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); - struct dn_fib_rule *r, *new_r, *last = NULL; - struct hlist_node *node = NULL; - unsigned char table_id; - - if (rtm->rtm_src_len > 16 || rtm->rtm_dst_len > 16) - return -EINVAL; - - if (rta[RTA_IIF-1] && RTA_PAYLOAD(rta[RTA_IIF-1]) > IFNAMSIZ) - return -EINVAL; - - if (rtm->rtm_type == RTN_NAT) - return -EINVAL; - - table_id = rtm->rtm_table; - if (table_id == RT_TABLE_UNSPEC) { - struct dn_fib_table *tb; - if (rtm->rtm_type == RTN_UNICAST) { - if ((tb = dn_fib_empty_table()) == NULL) - return -ENOBUFS; - table_id = tb->n; - } - } + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + u16 daddr = fl->fld_dst; + u16 saddr = fl->fld_src; + + if (((saddr ^ r->src) & r->srcmask) || + ((daddr ^ r->dst) & r->dstmask)) + return 0; - new_r = kzalloc(sizeof(*new_r), GFP_KERNEL); - if (!new_r) - return -ENOMEM; - - if (rta[RTA_SRC-1]) - memcpy(&new_r->r_src, RTA_DATA(rta[RTA_SRC-1]), 2); - if (rta[RTA_DST-1]) - memcpy(&new_r->r_dst, RTA_DATA(rta[RTA_DST-1]), 2); - if (rta[RTA_GATEWAY-1]) - memcpy(&new_r->r_srcmap, RTA_DATA(rta[RTA_GATEWAY-1]), 2); - new_r->r_src_len = rtm->rtm_src_len; - new_r->r_dst_len = rtm->rtm_dst_len; - new_r->r_srcmask = dnet_make_mask(rtm->rtm_src_len); - new_r->r_dstmask = dnet_make_mask(rtm->rtm_dst_len); #ifdef CONFIG_DECNET_ROUTE_FWMARK - if (rta[RTA_PROTOINFO-1]) - memcpy(&new_r->r_fwmark, RTA_DATA(rta[RTA_PROTOINFO-1]), 4); + if (r->fwmark && (r->fwmark != fl->fld_fwmark)) + return 0; #endif - new_r->r_action = rtm->rtm_type; - new_r->r_flags = rtm->rtm_flags; - if (rta[RTA_PRIORITY-1]) - memcpy(&new_r->r_preference, RTA_DATA(rta[RTA_PRIORITY-1]), 4); - new_r->r_table = table_id; - if (rta[RTA_IIF-1]) { - struct net_device *dev; - rtattr_strlcpy(new_r->r_ifname, rta[RTA_IIF-1], IFNAMSIZ); - new_r->r_ifindex = -1; - dev = dev_get_by_name(new_r->r_ifname); - if (dev) { - new_r->r_ifindex = dev->ifindex; - dev_put(dev); - } - } - r = container_of(dn_fib_rules.first, struct dn_fib_rule, r_hlist); - if (!new_r->r_preference) { - if (r && r->r_hlist.next != NULL) { - r = container_of(r->r_hlist.next, struct dn_fib_rule, r_hlist); - if (r->r_preference) - new_r->r_preference = r->r_preference - 1; + return 1; +} + +static int dn_fib_rule_configure(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh, + struct nlattr **tb) +{ + int err = -EINVAL; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len > 16 || frh->dst_len > 16 || frh->tos) + goto errout; + + if (rule->table == RT_TABLE_UNSPEC) { + if (rule->action == FR_ACT_TO_TBL) { + struct dn_fib_table *table; + + table = dn_fib_empty_table(); + if (table == NULL) { + err = -ENOBUFS; + goto errout; + } + + rule->table = table->n; } } - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_preference > new_r->r_preference) - break; - last = r; - } - atomic_inc(&new_r->r_clntref); + if (tb[FRA_SRC]) + r->src = nla_get_u16(tb[FRA_SRC]); - if (last) - hlist_add_after_rcu(&last->r_hlist, &new_r->r_hlist); - else - hlist_add_before_rcu(&new_r->r_hlist, &r->r_hlist); - return 0; -} + if (tb[FRA_DST]) + r->dst = nla_get_u16(tb[FRA_DST]); +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + r->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + + r->src_len = frh->src_len; + r->srcmask = dnet_make_mask(r->src_len); + r->dst_len = frh->dst_len; + r->dstmask = dnet_make_mask(r->dst_len); + err = 0; +errout: + return err; +} -int dn_fib_lookup(const struct flowi *flp, struct dn_fib_res *res) +static int dn_fib_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, + struct nlattr **tb) { - struct dn_fib_rule *r, *policy; - struct dn_fib_table *tb; - __le16 saddr = flp->fld_src; - __le16 daddr = flp->fld_dst; - struct hlist_node *node; - int err; + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; + + if (frh->src_len && (r->src_len != frh->src_len)) + return 0; - rcu_read_lock(); + if (frh->dst_len && (r->dst_len != frh->dst_len)) + return 0; - hlist_for_each_entry_rcu(r, node, &dn_fib_rules, r_hlist) { - if (((saddr^r->r_src) & r->r_srcmask) || - ((daddr^r->r_dst) & r->r_dstmask) || #ifdef CONFIG_DECNET_ROUTE_FWMARK - (r->r_fwmark && r->r_fwmark != flp->fld_fwmark) || + if (tb[FRA_FWMARK] && (r->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; #endif - (r->r_ifindex && r->r_ifindex != flp->iif)) - continue; - - switch(r->r_action) { - case RTN_UNICAST: - case RTN_NAT: - policy = r; - break; - case RTN_UNREACHABLE: - rcu_read_unlock(); - return -ENETUNREACH; - default: - case RTN_BLACKHOLE: - rcu_read_unlock(); - return -EINVAL; - case RTN_PROHIBIT: - rcu_read_unlock(); - return -EACCES; - } - if ((tb = dn_fib_get_table(r->r_table, 0)) == NULL) - continue; - err = tb->lookup(tb, flp, res); - if (err == 0) { - res->r = policy; - if (policy) - atomic_inc(&policy->r_clntref); - rcu_read_unlock(); - return 0; - } - if (err < 0 && err != -EAGAIN) { - rcu_read_unlock(); - return err; - } - } + if (tb[FRA_SRC] && (r->src != nla_get_u32(tb[FRA_SRC]))) + return 0; + + if (tb[FRA_DST] && (r->dst != nla_get_u32(tb[FRA_DST]))) + return 0; - rcu_read_unlock(); - return -ESRCH; + return 1; } unsigned dnet_addr_type(__le16 addr) @@ -284,142 +223,77 @@ unsigned dnet_addr_type(__le16 addr) return ret; } -__le16 dn_fib_rules_policy(__le16 saddr, struct dn_fib_res *res, unsigned *flags) +static int dn_fib_rule_fill(struct fib_rule *rule, struct sk_buff *skb, + struct nlmsghdr *nlh, struct fib_rule_hdr *frh) { - struct dn_fib_rule *r = res->r; - - if (r->r_action == RTN_NAT) { - int addrtype = dnet_addr_type(r->r_srcmap); - - if (addrtype == RTN_NAT) { - saddr = (saddr&~r->r_srcmask)|r->r_srcmap; - *flags |= RTCF_SNAT; - } else if (addrtype == RTN_LOCAL || r->r_srcmap == 0) { - saddr = r->r_srcmap; - *flags |= RTCF_MASQ; - } - } - return saddr; -} + struct dn_fib_rule *r = (struct dn_fib_rule *)rule; -static void dn_fib_rules_detach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; + frh->family = AF_DECnet; + frh->dst_len = r->dst_len; + frh->src_len = r->src_len; + frh->tos = 0; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == dev->ifindex) - r->r_ifindex = -1; - } -} +#ifdef CONFIG_DECNET_ROUTE_FWMARK + if (r->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, r->fwmark); +#endif + if (r->dst_len) + NLA_PUT_U16(skb, FRA_DST, r->dst); + if (r->src_len) + NLA_PUT_U16(skb, FRA_SRC, r->src); -static void dn_fib_rules_attach(struct net_device *dev) -{ - struct hlist_node *node; - struct dn_fib_rule *r; + return 0; - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (r->r_ifindex == -1 && strcmp(dev->name, r->r_ifname) == 0) - r->r_ifindex = dev->ifindex; - } +nla_put_failure: + return -ENOBUFS; } -static int dn_fib_rules_event(struct notifier_block *this, unsigned long event, void *ptr) +static u32 dn_fib_rule_default_pref(void) { - struct net_device *dev = ptr; - - switch(event) { - case NETDEV_UNREGISTER: - dn_fib_rules_detach(dev); - dn_fib_sync_down(0, dev, 1); - case NETDEV_REGISTER: - dn_fib_rules_attach(dev); - dn_fib_sync_up(dev); + struct list_head *pos; + struct fib_rule *rule; + + if (!list_empty(&dn_fib_rules)) { + pos = dn_fib_rules.next; + if (pos->next != &dn_fib_rules) { + rule = list_entry(pos->next, struct fib_rule, list); + if (rule->pref) + return rule->pref - 1; + } } - return NOTIFY_DONE; -} - - -static struct notifier_block dn_fib_rules_notifier = { - .notifier_call = dn_fib_rules_event, -}; - -static int dn_fib_fill_rule(struct sk_buff *skb, struct dn_fib_rule *r, - struct netlink_callback *cb, unsigned int flags) -{ - struct rtmsg *rtm; - struct nlmsghdr *nlh; - unsigned char *b = skb->tail; - - - nlh = NLMSG_NEW_ANSWER(skb, cb, RTM_NEWRULE, sizeof(*rtm), flags); - rtm = NLMSG_DATA(nlh); - rtm->rtm_family = AF_DECnet; - rtm->rtm_dst_len = r->r_dst_len; - rtm->rtm_src_len = r->r_src_len; - rtm->rtm_tos = 0; -#ifdef CONFIG_DECNET_ROUTE_FWMARK - if (r->r_fwmark) - RTA_PUT(skb, RTA_PROTOINFO, 4, &r->r_fwmark); -#endif - rtm->rtm_table = r->r_table; - rtm->rtm_protocol = 0; - rtm->rtm_scope = 0; - rtm->rtm_type = r->r_action; - rtm->rtm_flags = r->r_flags; - - if (r->r_dst_len) - RTA_PUT(skb, RTA_DST, 2, &r->r_dst); - if (r->r_src_len) - RTA_PUT(skb, RTA_SRC, 2, &r->r_src); - if (r->r_ifname[0]) - RTA_PUT(skb, RTA_IIF, IFNAMSIZ, &r->r_ifname); - if (r->r_preference) - RTA_PUT(skb, RTA_PRIORITY, 4, &r->r_preference); - if (r->r_srcmap) - RTA_PUT(skb, RTA_GATEWAY, 2, &r->r_srcmap); - nlh->nlmsg_len = skb->tail - b; - return skb->len; - -nlmsg_failure: -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; + return 0; } int dn_fib_dump_rules(struct sk_buff *skb, struct netlink_callback *cb) { - int idx = 0; - int s_idx = cb->args[0]; - struct dn_fib_rule *r; - struct hlist_node *node; - - rcu_read_lock(); - hlist_for_each_entry(r, node, &dn_fib_rules, r_hlist) { - if (idx < s_idx) - goto next; - if (dn_fib_fill_rule(skb, r, cb, NLM_F_MULTI) < 0) - break; -next: - idx++; - } - rcu_read_unlock(); - cb->args[0] = idx; - - return skb->len; + return fib_rules_dump(skb, cb, AF_DECnet); } +static struct fib_rules_ops dn_fib_rules_ops = { + .family = AF_DECnet, + .rule_size = sizeof(struct dn_fib_rule), + .action = dn_fib_rule_action, + .match = dn_fib_rule_match, + .configure = dn_fib_rule_configure, + .compare = dn_fib_rule_compare, + .fill = dn_fib_rule_fill, + .default_pref = dn_fib_rule_default_pref, + .nlgroup = RTNLGRP_DECnet_RULE, + .policy = dn_fib_rule_policy, + .rules_list = &dn_fib_rules, + .owner = THIS_MODULE, +}; + void __init dn_fib_rules_init(void) { - INIT_HLIST_HEAD(&dn_fib_rules); - hlist_add_head(&default_rule.r_hlist, &dn_fib_rules); - register_netdevice_notifier(&dn_fib_rules_notifier); + list_add_tail(&default_rule.common.list, &dn_fib_rules); + fib_rules_register(&dn_fib_rules_ops); } void __exit dn_fib_rules_cleanup(void) { - unregister_netdevice_notifier(&dn_fib_rules_notifier); + fib_rules_unregister(&dn_fib_rules_ops); } diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index e926c952e363..2e01b67398c8 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include -- cgit v1.3-8-gc7d7 From d924424aaed116b362c6d0e667d912b77e655085 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 10 Aug 2006 23:03:23 -0700 Subject: [NEIGHBOUR]: Use ALIGN() macro. Rather than opencoding the mask, it looks better to use ALIGN() macro from kernel.h. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/neighbour.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index 74c4b6ff8a5c..bd187daffdb9 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -101,7 +101,7 @@ struct neighbour __u8 dead; atomic_t probes; rwlock_t lock; - unsigned char ha[(MAX_ADDR_LEN+sizeof(unsigned long)-1)&~(sizeof(unsigned long)-1)]; + unsigned char ha[ALIGN(MAX_ADDR_LEN, sizeof(unsigned long))]; struct hh_cache *hh; atomic_t refcnt; int (*output)(struct sk_buff *skb); -- cgit v1.3-8-gc7d7 From 2dfe55b47e3d66ded5a84caf71e0da5710edf48b Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:08:33 -0700 Subject: [NET]: Use u32 for routing table IDs Use u32 for routing table IDs in net/ipv4 and net/decnet in preparation of support for a larger number of routing tables. net/ipv6 already uses u32 everywhere and needs no further changes. No functional changes are made by this patch. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/dn_fib.h | 4 ++-- include/net/ip_fib.h | 14 +++++++------- net/decnet/dn_fib.c | 6 +++--- net/decnet/dn_table.c | 10 +++++----- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv4/fib_hash.c | 4 ++-- net/ipv4/fib_lookup.h | 4 ++-- net/ipv4/fib_rules.c | 2 +- net/ipv4/fib_semantics.c | 4 ++-- net/ipv4/fib_trie.c | 6 +++--- 10 files changed, 31 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index 32bc8ce5c5ce..cd9c3782f838 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,7 +94,7 @@ struct dn_fib_node { struct dn_fib_table { - int n; + u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, @@ -137,7 +137,7 @@ extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c */ -extern struct dn_fib_table *dn_fib_get_table(int n, int creat); +extern struct dn_fib_table *dn_fib_get_table(u32 n, int creat); extern struct dn_fib_table *dn_fib_empty_table(void); extern void dn_fib_table_init(void); extern void dn_fib_table_cleanup(void); diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index adf73586bc05..0dcbf166eb94 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -150,7 +150,7 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { - unsigned char tb_id; + u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); int (*tb_insert)(struct fib_table *table, struct rtmsg *r, @@ -173,14 +173,14 @@ struct fib_table { extern struct fib_table *ip_fib_local_table; extern struct fib_table *ip_fib_main_table; -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id != RT_TABLE_LOCAL) return ip_fib_main_table; return ip_fib_local_table; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { return fib_get_table(id); } @@ -205,9 +205,9 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(int id); +extern struct fib_table *__fib_new_table(u32 id); -static inline struct fib_table *fib_get_table(int id) +static inline struct fib_table *fib_get_table(u32 id) { if (id == 0) id = RT_TABLE_MAIN; @@ -215,7 +215,7 @@ static inline struct fib_table *fib_get_table(int id) return fib_tables[id]; } -static inline struct fib_table *fib_new_table(int id) +static inline struct fib_table *fib_new_table(u32 id) { if (id == 0) id = RT_TABLE_MAIN; @@ -248,7 +248,7 @@ extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ -extern struct fib_table *fib_hash_init(int id); +extern struct fib_table *fib_hash_init(u32 id); #ifdef CONFIG_IP_MULTIPLE_TABLES extern int fib4_rules_dump(struct sk_buff *skb, struct netlink_callback *cb); diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index ed5fb5c3eab5..7b3bf5c3d720 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -534,8 +534,8 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + u32 t; + u32 s_t; struct dn_fib_table *tb; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && @@ -765,7 +765,7 @@ void dn_fib_flush(void) { int flushed = 0; struct dn_fib_table *tb; - int id; + u32 id; for(id = RT_TABLE_MAX; id > 0; id--) { if ((tb = dn_fib_get_table(id, 0)) == NULL) diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 2e01b67398c8..1601ee5406a8 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -264,7 +264,7 @@ static int dn_fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct dn_kern } static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; @@ -327,7 +327,7 @@ rtattr_failure: } -static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, int tb_id, +static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; @@ -740,7 +740,7 @@ out: } -struct dn_fib_table *dn_fib_get_table(int n, int create) +struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; @@ -777,7 +777,7 @@ struct dn_fib_table *dn_fib_get_table(int n, int create) return t; } -static void dn_fib_del_tree(int n) +static void dn_fib_del_tree(u32 n) { struct dn_fib_table *t; @@ -791,7 +791,7 @@ static void dn_fib_del_tree(int n) struct dn_fib_table *dn_fib_empty_table(void) { - int id; + u32 id; for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) if (dn_fib_tables[id] == NULL) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index a83f1aa8034e..06f4b23f6f57 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -62,7 +62,7 @@ struct fib_table *ip_fib_main_table; struct fib_table *fib_tables[RT_TABLE_MAX+1]; -struct fib_table *__fib_new_table(int id) +struct fib_table *__fib_new_table(u32 id) { struct fib_table *tb; @@ -82,7 +82,7 @@ static void fib_flush(void) int flushed = 0; #ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - int id; + u32 id; for (id = RT_TABLE_MAX; id>0; id--) { if ((tb = fib_get_table(id))==NULL) @@ -333,8 +333,8 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - int t; - int s_t; + u32 t; + u32 s_t; struct fib_table *tb; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index 72c633b357cf..f8d5c8024ccb 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -765,9 +765,9 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin } #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ef6609ea0eb7..ddd52496b451 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -30,11 +30,11 @@ extern struct fib_info *fib_create_info(const struct rtmsg *r, extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, struct kern_rta *rta, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, + int z, u32 tb_id, struct nlmsghdr *n, struct netlink_skb_parms *req); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index d242e5291fcc..58fb91b00fdf 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -169,7 +169,7 @@ static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) static struct fib_table *fib_empty_table(void) { - int id; + u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) if (fib_tables[id] == NULL) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 38bca473c7e2..c7a112b5a185 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -273,7 +273,7 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) } void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, int tb_id, + int z, u32 tb_id, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct sk_buff *skb; @@ -939,7 +939,7 @@ u32 __fib_res_prefsrc(struct fib_result *res) int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u8 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, + u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct rtmsg *rtm; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 01801c0f885d..4a27b2d573a3 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1148,7 +1148,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = ntohl(key); - pr_debug("Insert table=%d %08x/%d\n", tb->tb_id, key, plen); + pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); mask = ntohl(inet_make_mask(plen)); @@ -1943,9 +1943,9 @@ out: /* Fix more generic FIB names for init later */ #ifdef CONFIG_IP_MULTIPLE_TABLES -struct fib_table * fib_hash_init(int id) +struct fib_table * fib_hash_init(u32 id) #else -struct fib_table * __init fib_hash_init(int id) +struct fib_table * __init fib_hash_init(u32 id) #endif { struct fib_table *tb; -- cgit v1.3-8-gc7d7 From 9e762a4a89b302cb3b26a1f9bb33eff459eaeca9 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:09:48 -0700 Subject: [NET]: Introduce RTA_TABLE/FRA_TABLE attributes Introduce RTA_TABLE route attribute and FRA_TABLE routing rule attribute to hold 32 bit routing table IDs. Usespace compatibility is provided by continuing to accept and send the rtm_table field, but because of its limited size it can only carry the low 8 bits of the table ID. This implies that if larger IDs are used, _all_ userspace programs using them need to use RTA_TABLE. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/fib_rules.h | 4 ++++ include/linux/rtnetlink.h | 8 ++++++++ include/net/fib_rules.h | 7 +++++++ net/core/fib_rules.c | 5 +++-- net/decnet/dn_fib.c | 7 ++++--- net/decnet/dn_route.c | 1 + net/decnet/dn_table.c | 1 + net/ipv4/fib_frontend.c | 7 ++++--- net/ipv4/fib_rules.c | 1 + net/ipv4/fib_semantics.c | 1 + net/ipv4/route.c | 1 + net/ipv6/fib6_rules.c | 1 + net/ipv6/route.c | 13 +++++++++---- 13 files changed, 45 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 5e503f0ca6e4..19a82b6c1c1f 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -36,6 +36,10 @@ enum FRA_UNUSED5, FRA_FWMARK, /* netfilter mark (IPv4) */ FRA_FLOW, /* flow/class id */ + FRA_UNUSED6, + FRA_UNUSED7, + FRA_UNUSED8, + FRA_TABLE, /* Extended table id */ __FRA_MAX }; diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0aaffa2ae666..ea422a539a03 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -264,6 +264,7 @@ enum rtattr_type_t RTA_CACHEINFO, RTA_SESSION, RTA_MP_ALGO, + RTA_TABLE, __RTA_MAX }; @@ -717,6 +718,13 @@ extern void __rtnl_unlock(void); } \ } while(0) +static inline u32 rtm_get_table(struct rtattr **rta, u8 table) +{ + return RTA_GET_U32(rta[RTA_TABLE-1]); +rtattr_failure: + return table; +} + #endif /* __KERNEL__ */ diff --git a/include/net/fib_rules.h b/include/net/fib_rules.h index 61375d9e53f8..8e2f473d3e82 100644 --- a/include/net/fib_rules.h +++ b/include/net/fib_rules.h @@ -74,6 +74,13 @@ static inline void fib_rule_put(struct fib_rule *rule) call_rcu(&rule->rcu, fib_rule_put_rcu); } +static inline u32 frh_get_table(struct fib_rule_hdr *frh, struct nlattr **nla) +{ + if (nla[FRA_TABLE]) + return nla_get_u32(nla[FRA_TABLE]); + return frh->table; +} + extern int fib_rules_register(struct fib_rules_ops *); extern int fib_rules_unregister(struct fib_rules_ops *); diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 6cdad24038e2..873b04d5df81 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -187,7 +187,7 @@ int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rule->action = frh->action; rule->flags = frh->flags; - rule->table = frh->table; + rule->table = frh_get_table(frh, tb); if (!rule->pref && ops->default_pref) rule->pref = ops->default_pref(); @@ -245,7 +245,7 @@ int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (frh->action && (frh->action != rule->action)) continue; - if (frh->table && (frh->table != rule->table)) + if (frh->table && (frh_get_table(frh, tb) != rule->table)) continue; if (tb[FRA_PRIORITY] && @@ -291,6 +291,7 @@ static int fib_nl_fill_rule(struct sk_buff *skb, struct fib_rule *rule, frh = nlmsg_data(nlh); frh->table = rule->table; + NLA_PUT_U32(skb, FRA_TABLE, rule->table); frh->res1 = 0; frh->res2 = 0; frh->action = rule->action; diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 7b3bf5c3d720..fb596373daa8 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -491,7 +491,8 @@ static int dn_fib_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4 && RTA_PAYLOAD(attr) != 2) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) rta[i-1] = (struct rtattr *)RTA_DATA(attr); } } @@ -508,7 +509,7 @@ int dn_fib_rtm_delroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 0); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 0); if (tb) return tb->delete(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); @@ -524,7 +525,7 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) if (dn_fib_check_attr(r, rta)) return -EINVAL; - tb = dn_fib_get_table(r->rtm_table, 1); + tb = dn_fib_get_table(rtm_get_table(rta, r->rtm_table), 1); if (tb) return tb->insert(tb, r, (struct dn_kern_rta *)rta, nlh, &NETLINK_CB(skb)); diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 5e6f4616ca10..4c963213fba5 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1486,6 +1486,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, r->rtm_src_len = 0; r->rtm_tos = 0; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_flags = (rt->rt_flags & ~0xFFFF) | RTM_F_CLONED; r->rtm_scope = RT_SCOPE_UNIVERSE; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 1601ee5406a8..eca7c1e10c80 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -278,6 +278,7 @@ static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = 0; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; rtm->rtm_type = type; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 06f4b23f6f57..2696ede52de2 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -294,7 +294,8 @@ static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) if (attr) { if (RTA_PAYLOAD(attr) < 4) return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS) + if (i != RTA_MULTIPATH && i != RTA_METRICS && + i != RTA_TABLE) *rta = (struct rtattr*)RTA_DATA(attr); } } @@ -310,7 +311,7 @@ int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_get_table(r->rtm_table); + tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ESRCH; @@ -325,7 +326,7 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet_check_attr(r, rta)) return -EINVAL; - tb = fib_new_table(r->rtm_table); + tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); if (tb) return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); return -ENOBUFS; diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 58fb91b00fdf..0330b9cc4b58 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -184,6 +184,7 @@ static struct nla_policy fib4_rule_policy[FRA_MAX+1] __read_mostly = { [FRA_DST] = { .type = NLA_U32 }, [FRA_FWMARK] = { .type = NLA_U32 }, [FRA_FLOW] = { .type = NLA_U32 }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib4_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index c7a112b5a185..ab753df20a39 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -953,6 +953,7 @@ fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, rtm->rtm_src_len = 0; rtm->rtm_tos = tos; rtm->rtm_table = tb_id; + RTA_PUT_U32(skb, RTA_TABLE, tb_id); rtm->rtm_type = type; rtm->rtm_flags = fi->fib_flags; rtm->rtm_scope = scope; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index b873cbcdd0b8..12128b82c9dc 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2652,6 +2652,7 @@ static int rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int event, r->rtm_src_len = 0; r->rtm_tos = rt->fl.fl4_tos; r->rtm_table = RT_TABLE_MAIN; + RTA_PUT_U32(skb, RTA_TABLE, RT_TABLE_MAIN); r->rtm_type = rt->rt_type; r->rtm_scope = RT_SCOPE_UNIVERSE; r->rtm_protocol = RTPROT_UNSPEC; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 22a2fdb09831..2c4fbc855e6c 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -129,6 +129,7 @@ static struct nla_policy fib6_rule_policy[RTA_MAX+1] __read_mostly = { [FRA_PRIORITY] = { .type = NLA_U32 }, [FRA_SRC] = { .minlen = sizeof(struct in6_addr) }, [FRA_DST] = { .minlen = sizeof(struct in6_addr) }, + [FRA_TABLE] = { .type = NLA_U32 }, }; static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index e08d84063c1f..843c5509fced 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1859,7 +1859,8 @@ int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) @@ -1869,7 +1870,8 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), r->rtm_table); + return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), + rtm_get_table(arg, r->rtm_table)); } struct rt6_rtnl_dump_arg @@ -1887,6 +1889,7 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct nlmsghdr *nlh; unsigned char *b = skb->tail; struct rta_cacheinfo ci; + u32 table; if (prefix) { /* user wants prefix routes only */ if (!(rt->rt6i_flags & RTF_PREFIX_RT)) { @@ -1902,9 +1905,11 @@ static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, rtm->rtm_src_len = rt->rt6i_src.plen; rtm->rtm_tos = 0; if (rt->rt6i_table) - rtm->rtm_table = rt->rt6i_table->tb6_id; + table = rt->rt6i_table->tb6_id; else - rtm->rtm_table = RT6_TABLE_UNSPEC; + table = RT6_TABLE_UNSPEC; + rtm->rtm_table = table; + RTA_PUT_U32(skb, RTA_TABLE, table); if (rt->rt6i_flags&RTF_REJECT) rtm->rtm_type = RTN_UNREACHABLE; else if (rt->rt6i_dev && (rt->rt6i_dev->flags&IFF_LOOPBACK)) -- cgit v1.3-8-gc7d7 From 1af5a8c4a11cfed0c9a7f30fcfb689981750599c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:10:46 -0700 Subject: [IPV4]: Increase number of possible routing tables to 2^32 Increase the number of possible routing tables to 2^32 by replacing the fixed sized array of pointers by a hash table and replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip_fib.h | 25 +++--------- net/ipv4/fib_frontend.c | 102 ++++++++++++++++++++++++++++++++---------------- net/ipv4/fib_hash.c | 26 ++++++------ net/ipv4/fib_rules.c | 4 +- net/ipv4/fib_trie.c | 26 ++++++------ 5 files changed, 101 insertions(+), 82 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 0dcbf166eb94..8e9ba563d342 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -150,6 +150,7 @@ struct fib_result_nl { #endif /* CONFIG_IP_ROUTE_MULTIPATH_WRANDOM */ struct fib_table { + struct hlist_node tb_hlist; u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); @@ -200,29 +201,13 @@ static inline void fib_select_default(const struct flowi *flp, struct fib_result } #else /* CONFIG_IP_MULTIPLE_TABLES */ -#define ip_fib_local_table (fib_tables[RT_TABLE_LOCAL]) -#define ip_fib_main_table (fib_tables[RT_TABLE_MAIN]) +#define ip_fib_local_table fib_get_table(RT_TABLE_LOCAL) +#define ip_fib_main_table fib_get_table(RT_TABLE_MAIN) -extern struct fib_table * fib_tables[RT_TABLE_MAX+1]; extern int fib_lookup(struct flowi *flp, struct fib_result *res); -extern struct fib_table *__fib_new_table(u32 id); - -static inline struct fib_table *fib_get_table(u32 id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id]; -} - -static inline struct fib_table *fib_new_table(u32 id) -{ - if (id == 0) - id = RT_TABLE_MAIN; - - return fib_tables[id] ? : __fib_new_table(id); -} +extern struct fib_table *fib_new_table(u32 id); +extern struct fib_table *fib_get_table(u32 id); extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 2696ede52de2..ad4c14f968a1 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -51,48 +52,67 @@ #ifndef CONFIG_IP_MULTIPLE_TABLES -#define RT_TABLE_MIN RT_TABLE_MAIN - struct fib_table *ip_fib_local_table; struct fib_table *ip_fib_main_table; -#else +#define FIB_TABLE_HASHSZ 1 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -#define RT_TABLE_MIN 1 +#else -struct fib_table *fib_tables[RT_TABLE_MAX+1]; +#define FIB_TABLE_HASHSZ 256 +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; -struct fib_table *__fib_new_table(u32 id) +struct fib_table *fib_new_table(u32 id) { struct fib_table *tb; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + tb = fib_get_table(id); + if (tb) + return tb; tb = fib_hash_init(id); if (!tb) return NULL; - fib_tables[id] = tb; + h = id & (FIB_TABLE_HASHSZ - 1); + hlist_add_head_rcu(&tb->tb_hlist, &fib_table_hash[h]); return tb; } +struct fib_table *fib_get_table(u32 id) +{ + struct fib_table *tb; + struct hlist_node *node; + unsigned int h; + if (id == 0) + id = RT_TABLE_MAIN; + h = id & (FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(tb, node, &fib_table_hash[h], tb_hlist) { + if (tb->tb_id == id) { + rcu_read_unlock(); + return tb; + } + } + rcu_read_unlock(); + return NULL; +} #endif /* CONFIG_IP_MULTIPLE_TABLES */ - static void fib_flush(void) { int flushed = 0; -#ifdef CONFIG_IP_MULTIPLE_TABLES struct fib_table *tb; - u32 id; + struct hlist_node *node; + unsigned int h; - for (id = RT_TABLE_MAX; id>0; id--) { - if ((tb = fib_get_table(id))==NULL) - continue; - flushed += tb->tb_flush(tb); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) + flushed += tb->tb_flush(tb); } -#else /* CONFIG_IP_MULTIPLE_TABLES */ - flushed += ip_fib_main_table->tb_flush(ip_fib_main_table); - flushed += ip_fib_local_table->tb_flush(ip_fib_local_table); -#endif /* CONFIG_IP_MULTIPLE_TABLES */ if (flushed) rt_cache_flush(-1); @@ -334,29 +354,37 @@ int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) { - u32 t; - u32 s_t; + unsigned int h, s_h; + unsigned int e = 0, s_e; struct fib_table *tb; + struct hlist_node *node; + int dumped = 0; if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && ((struct rtmsg*)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) return ip_rt_dump(skb, cb); - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_TABLE_MIN; - - for (t=s_t; t<=RT_TABLE_MAX; t++) { - if (t < s_t) continue; - if (t > s_t) - memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); - if ((tb = fib_get_table(t))==NULL) - continue; - if (tb->tb_dump(tb, skb, cb) < 0) - break; + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb_hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->tb_dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } } - - cb->args[0] = t; +out: + cb->args[1] = e; + cb->args[0] = h; return skb->len; } @@ -654,9 +682,15 @@ static struct notifier_block fib_netdev_notifier = { void __init ip_fib_init(void) { + unsigned int i; + + for (i = 0; i < FIB_TABLE_HASHSZ; i++) + INIT_HLIST_HEAD(&fib_table_hash[i]); #ifndef CONFIG_IP_MULTIPLE_TABLES ip_fib_local_table = fib_hash_init(RT_TABLE_LOCAL); + hlist_add_head_rcu(&ip_fib_local_table->tb_hlist, &fib_table_hash[0]); ip_fib_main_table = fib_hash_init(RT_TABLE_MAIN); + hlist_add_head_rcu(&ip_fib_main_table->tb_hlist, &fib_table_hash[0]); #else fib4_rules_init(); #endif diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index f8d5c8024ccb..b5bee1a71e5c 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -684,7 +684,7 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, struct fib_node *f; int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; hlist_for_each_entry(f, node, head, fn_hash) { struct fib_alias *fa; @@ -704,14 +704,14 @@ fn_hash_dump_bucket(struct sk_buff *skb, struct netlink_callback *cb, fa->fa_tos, fa->fa_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } next: i++; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -722,21 +722,21 @@ fn_hash_dump_zone(struct sk_buff *skb, struct netlink_callback *cb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h=0; h < fz->fz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); if (fz->fz_hash == NULL || hlist_empty(&fz->fz_hash[h])) continue; if (fn_hash_dump_bucket(skb, cb, tb, fz, &fz->fz_hash[h])<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -746,21 +746,21 @@ static int fn_hash_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin struct fn_zone *fz; struct fn_hash *table = (struct fn_hash*)tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&fib_hash_lock); for (fz = table->fn_zone_list, m=0; fz; fz = fz->fz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_hash_dump_zone(skb, cb, tb, fz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&fib_hash_lock); return -1; } } read_unlock(&fib_hash_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 0330b9cc4b58..ce185ac6f260 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -172,8 +172,8 @@ static struct fib_table *fib_empty_table(void) u32 id; for (id = 1; id <= RT_TABLE_MAX; id++) - if (fib_tables[id] == NULL) - return __fib_new_table(id); + if (fib_get_table(id) == NULL) + return fib_new_table(id); return NULL; } diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 4a27b2d573a3..2a580eb2579b 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1848,7 +1848,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi u32 xkey = htonl(key); - s_i = cb->args[3]; + s_i = cb->args[4]; i = 0; /* rcu_read_lock is hold by caller */ @@ -1870,12 +1870,12 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, struct fi plen, fa->fa_tos, fa->fa_info, 0) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } i++; } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -1886,14 +1886,14 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str struct list_head *fa_head; struct leaf *l = NULL; - s_h = cb->args[2]; + s_h = cb->args[3]; for (h = 0; (l = nextleaf(t, l)) != NULL; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, - sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, + sizeof(cb->args) - 4*sizeof(cb->args[0])); fa_head = get_fa_head(l, plen); @@ -1904,11 +1904,11 @@ static int fn_trie_dump_plen(struct trie *t, int plen, struct fib_table *tb, str continue; if (fn_trie_dump_fa(l->key, plen, fa_head, tb, skb, cb)<0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -1917,23 +1917,23 @@ static int fn_trie_dump(struct fib_table *tb, struct sk_buff *skb, struct netlin int m, s_m; struct trie *t = (struct trie *) tb->tb_data; - s_m = cb->args[1]; + s_m = cb->args[2]; rcu_read_lock(); for (m = 0; m <= 32; m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, - sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, + sizeof(cb->args) - 3*sizeof(cb->args[0])); if (fn_trie_dump_plen(t, 32-m, tb, skb, cb)<0) { - cb->args[1] = m; + cb->args[2] = m; goto out; } } rcu_read_unlock(); - cb->args[1] = m; + cb->args[2] = m; return skb->len; out: rcu_read_unlock(); -- cgit v1.3-8-gc7d7 From 1b43af5480c351dbcb2eef478bafe179cbeb6e83 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:11:17 -0700 Subject: [IPV6]: Increase number of possible routing tables to 2^32 Increase number of possible routing tables to 2^32 by replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/ip6_route.h | 7 ++ net/ipv6/ip6_fib.c | 171 ++++++++++++++++++++++++++++++++++++++++++------ net/ipv6/route.c | 128 +----------------------------------- 3 files changed, 159 insertions(+), 147 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9bfa3cc6cedb..01bfe404784f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -137,6 +137,13 @@ extern int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *a extern int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet6_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); +struct rt6_rtnl_dump_arg +{ + struct sk_buff *skb; + struct netlink_callback *cb; +}; + +extern int rt6_dump_route(struct rt6_info *rt, void *p_arg); extern void rt6_ifdown(struct net_device *dev); extern void rt6_mtu_change(struct net_device *dev, unsigned mtu); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 1f2316187ca4..bececbe9dd2c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -158,7 +158,26 @@ static struct fib6_table fib6_main_tbl = { }; #ifdef CONFIG_IPV6_MULTIPLE_TABLES +#define FIB_TABLE_HASHSZ 256 +#else +#define FIB_TABLE_HASHSZ 1 +#endif +static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; + +static void fib6_link_table(struct fib6_table *tb) +{ + unsigned int h; + + h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); + /* + * No protection necessary, this is the only list mutatation + * operation, tables never disappear once they exist. + */ + hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); +} + +#ifdef CONFIG_IPV6_MULTIPLE_TABLES static struct fib6_table fib6_local_tbl = { .tb6_id = RT6_TABLE_LOCAL, .tb6_lock = RW_LOCK_UNLOCKED, @@ -168,9 +187,6 @@ static struct fib6_table fib6_local_tbl = { }, }; -#define FIB_TABLE_HASHSZ 256 -static struct hlist_head fib_table_hash[FIB_TABLE_HASHSZ]; - static struct fib6_table *fib6_alloc_table(u32 id) { struct fib6_table *table; @@ -186,19 +202,6 @@ static struct fib6_table *fib6_alloc_table(u32 id) return table; } -static void fib6_link_table(struct fib6_table *tb) -{ - unsigned int h; - - h = tb->tb6_id & (FIB_TABLE_HASHSZ - 1); - - /* - * No protection necessary, this is the only list mutatation - * operation, tables never disappear once they exist. - */ - hlist_add_head_rcu(&tb->tb6_hlist, &fib_table_hash[h]); -} - struct fib6_table *fib6_new_table(u32 id) { struct fib6_table *tb; @@ -263,10 +266,135 @@ struct dst_entry *fib6_rule_lookup(struct flowi *fl, int flags, static void __init fib6_tables_init(void) { + fib6_link_table(&fib6_main_tbl); } #endif +static int fib6_dump_node(struct fib6_walker_t *w) +{ + int res; + struct rt6_info *rt; + + for (rt = w->leaf; rt; rt = rt->u.next) { + res = rt6_dump_route(rt, w->args); + if (res < 0) { + /* Frame is full, suspend walking */ + w->leaf = rt; + return 1; + } + BUG_TRAP(res!=0); + } + w->leaf = NULL; + return 0; +} + +static void fib6_dump_end(struct netlink_callback *cb) +{ + struct fib6_walker_t *w = (void*)cb->args[2]; + + if (w) { + cb->args[2] = 0; + kfree(w); + } + cb->done = (void*)cb->args[3]; + cb->args[1] = 3; +} + +static int fib6_dump_done(struct netlink_callback *cb) +{ + fib6_dump_end(cb); + return cb->done ? cb->done(cb) : 0; +} + +static int fib6_dump_table(struct fib6_table *table, struct sk_buff *skb, + struct netlink_callback *cb) +{ + struct fib6_walker_t *w; + int res; + + w = (void *)cb->args[2]; + w->root = &table->tb6_root; + + if (cb->args[4] == 0) { + read_lock_bh(&table->tb6_lock); + res = fib6_walk(w); + read_unlock_bh(&table->tb6_lock); + if (res > 0) + cb->args[4] = 1; + } else { + read_lock_bh(&table->tb6_lock); + res = fib6_walk_continue(w); + read_unlock_bh(&table->tb6_lock); + if (res != 0) { + if (res < 0) + fib6_walker_unlink(w); + goto end; + } + fib6_walker_unlink(w); + cb->args[4] = 0; + } +end: + return res; +} + +int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct rt6_rtnl_dump_arg arg; + struct fib6_walker_t *w; + struct fib6_table *tb; + struct hlist_node *node; + int res = 0; + + s_h = cb->args[0]; + s_e = cb->args[1]; + + w = (void *)cb->args[2]; + if (w == NULL) { + /* New dump: + * + * 1. hook callback destructor. + */ + cb->args[3] = (long)cb->done; + cb->done = fib6_dump_done; + + /* + * 2. allocate and initialize walker. + */ + w = kzalloc(sizeof(*w), GFP_ATOMIC); + if (w == NULL) + return -ENOMEM; + w->func = fib6_dump_node; + cb->args[2] = (long)w; + } + + arg.skb = skb; + arg.cb = cb; + w->args = &arg; + + for (h = s_h; h < FIB_TABLE_HASHSZ; h++, s_e = 0) { + e = 0; + hlist_for_each_entry(tb, node, &fib_table_hash[h], tb6_hlist) { + if (e < s_e) + goto next; + res = fib6_dump_table(tb, skb, cb); + if (res != 0) + goto out; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + res = res < 0 ? res : skb->len; + if (res <= 0) + fib6_dump_end(cb); + return res; +} /* * Routing Table @@ -1187,17 +1315,20 @@ static void fib6_clean_tree(struct fib6_node *root, void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg) { - int i; struct fib6_table *table; + struct hlist_node *node; + unsigned int h; - for (i = FIB6_TABLE_MIN; i <= FIB6_TABLE_MAX; i++) { - table = fib6_get_table(i); - if (table != NULL) { + rcu_read_lock(); + for (h = 0; h < FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_rcu(table, node, &fib_table_hash[h], + tb6_hlist) { write_lock_bh(&table->tb6_lock); fib6_clean_tree(&table->tb6_root, func, prune, arg); write_unlock_bh(&table->tb6_lock); } } + rcu_read_unlock(); } static int fib6_prune_clone(struct rt6_info *rt, void *arg) diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 843c5509fced..9ce28277f47f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,12 +1874,6 @@ int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) rtm_get_table(arg, r->rtm_table)); } -struct rt6_rtnl_dump_arg -{ - struct sk_buff *skb; - struct netlink_callback *cb; -}; - static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, int iif, int type, u32 pid, u32 seq, @@ -1976,7 +1970,7 @@ rtattr_failure: return -1; } -static int rt6_dump_route(struct rt6_info *rt, void *p_arg) +int rt6_dump_route(struct rt6_info *rt, void *p_arg) { struct rt6_rtnl_dump_arg *arg = (struct rt6_rtnl_dump_arg *) p_arg; int prefix; @@ -1992,126 +1986,6 @@ static int rt6_dump_route(struct rt6_info *rt, void *p_arg) prefix, NLM_F_MULTI); } -static int fib6_dump_node(struct fib6_walker_t *w) -{ - int res; - struct rt6_info *rt; - - for (rt = w->leaf; rt; rt = rt->u.next) { - res = rt6_dump_route(rt, w->args); - if (res < 0) { - /* Frame is full, suspend walking */ - w->leaf = rt; - return 1; - } - BUG_TRAP(res!=0); - } - w->leaf = NULL; - return 0; -} - -static void fib6_dump_end(struct netlink_callback *cb) -{ - struct fib6_walker_t *w = (void*)cb->args[0]; - - if (w) { - cb->args[0] = 0; - kfree(w); - } - cb->done = (void*)cb->args[1]; - cb->args[1] = 0; -} - -static int fib6_dump_done(struct netlink_callback *cb) -{ - fib6_dump_end(cb); - return cb->done ? cb->done(cb) : 0; -} - -int inet6_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) -{ - struct fib6_table *table; - struct rt6_rtnl_dump_arg arg; - struct fib6_walker_t *w; - int i, res = 0; - - arg.skb = skb; - arg.cb = cb; - - /* - * cb->args[0] = pointer to walker structure - * cb->args[1] = saved cb->done() pointer - * cb->args[2] = current table being dumped - */ - - w = (void*)cb->args[0]; - if (w == NULL) { - /* New dump: - * - * 1. hook callback destructor. - */ - cb->args[1] = (long)cb->done; - cb->done = fib6_dump_done; - - /* - * 2. allocate and initialize walker. - */ - w = kzalloc(sizeof(*w), GFP_ATOMIC); - if (w == NULL) - return -ENOMEM; - w->func = fib6_dump_node; - w->args = &arg; - cb->args[0] = (long)w; - cb->args[2] = FIB6_TABLE_MIN; - } else { - w->args = &arg; - i = cb->args[2]; - if (i > FIB6_TABLE_MAX) - goto end; - - table = fib6_get_table(i); - if (table != NULL) { - read_lock_bh(&table->tb6_lock); - w->root = &table->tb6_root; - res = fib6_walk_continue(w); - read_unlock_bh(&table->tb6_lock); - if (res != 0) { - if (res < 0) - fib6_walker_unlink(w); - goto end; - } - } - - fib6_walker_unlink(w); - cb->args[2] = ++i; - } - - for (i = cb->args[2]; i <= FIB6_TABLE_MAX; i++) { - table = fib6_get_table(i); - if (table == NULL) - continue; - - read_lock_bh(&table->tb6_lock); - w->root = &table->tb6_root; - res = fib6_walk(w); - read_unlock_bh(&table->tb6_lock); - if (res) - break; - } -end: - cb->args[2] = i; - - res = res < 0 ? res : skb->len; - /* res < 0 is an error. (really, impossible) - res == 0 means that dump is complete, but skb still can contain data. - res > 0 dump is not complete, but frame is full. - */ - /* Destroy walker, if dump of this table is complete. */ - if (res <= 0) - fib6_dump_end(cb); - return res; -} - int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { struct rtattr **rta = arg; -- cgit v1.3-8-gc7d7 From abcab268303c22d24fc89fedd35d82271d20f5da Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Thu, 10 Aug 2006 23:11:47 -0700 Subject: [DECNET]: Increase number of possible routing tables to 2^32 Increase the number of possible routing tables to 2^32 by replacing the fixed sized array of pointers by a hash table and replacing iterations over all possible table IDs by hash table walking. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/dn_fib.h | 3 +- net/decnet/dn_fib.c | 49 -------------------- net/decnet/dn_rules.c | 2 +- net/decnet/dn_table.c | 125 ++++++++++++++++++++++++++++++++++++-------------- 4 files changed, 93 insertions(+), 86 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index cd9c3782f838..d97aa10c463f 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -94,6 +94,7 @@ struct dn_fib_node { struct dn_fib_table { + struct hlist_node hlist; u32 n; int (*insert)(struct dn_fib_table *t, struct rtmsg *r, @@ -177,8 +178,6 @@ static inline void dn_fib_res_put(struct dn_fib_res *res) fib_rule_put(res->r); } -extern struct dn_fib_table *dn_fib_tables[]; - #else /* Endnode */ #define dn_fib_init() do { } while(0) diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index fb596373daa8..5ccca3ed53bd 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -532,39 +532,6 @@ int dn_fib_rtm_newroute(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENOBUFS; } - -int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) -{ - u32 t; - u32 s_t; - struct dn_fib_table *tb; - - if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && - ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) - return dn_cache_dump(skb, cb); - - s_t = cb->args[0]; - if (s_t == 0) - s_t = cb->args[0] = RT_MIN_TABLE; - - for(t = s_t; t <= RT_TABLE_MAX; t++) { - if (t < s_t) - continue; - if (t > s_t) - memset(&cb->args[1], 0, - sizeof(cb->args) - sizeof(cb->args[0])); - tb = dn_fib_get_table(t, 0); - if (tb == NULL) - continue; - if (tb->dump(tb, skb, cb) < 0) - break; - } - - cb->args[0] = t; - - return skb->len; -} - static void fib_magic(int cmd, int type, __le16 dst, int dst_len, struct dn_ifaddr *ifa) { struct dn_fib_table *tb; @@ -762,22 +729,6 @@ int dn_fib_sync_up(struct net_device *dev) return ret; } -void dn_fib_flush(void) -{ - int flushed = 0; - struct dn_fib_table *tb; - u32 id; - - for(id = RT_TABLE_MAX; id > 0; id--) { - if ((tb = dn_fib_get_table(id, 0)) == NULL) - continue; - flushed += tb->flush(tb); - } - - if (flushed) - dn_rt_cache_flush(-1); -} - static struct notifier_block dn_fib_dnaddr_notifier = { .notifier_call = dn_fib_dnaddr_event, }; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 096f1273e714..878312ff34ec 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -210,7 +210,7 @@ unsigned dnet_addr_type(__le16 addr) struct flowi fl = { .nl_u = { .dn_u = { .daddr = addr } } }; struct dn_fib_res res; unsigned ret = RTN_UNICAST; - struct dn_fib_table *tb = dn_fib_tables[RT_TABLE_LOCAL]; + struct dn_fib_table *tb = dn_fib_get_table(RT_TABLE_LOCAL, 0); res.r = NULL; diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index eca7c1e10c80..10e87262b6fb 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -75,9 +75,9 @@ for( ; ((f) = *(fp)) != NULL; (fp) = &(f)->fn_next) for( ; ((f) = *(fp)) != NULL && dn_key_eq((f)->fn_key, (key)); (fp) = &(f)->fn_next) #define RT_TABLE_MIN 1 - +#define DN_FIB_TABLE_HASHSZ 256 +static struct hlist_head dn_fib_table_hash[DN_FIB_TABLE_HASHSZ]; static DEFINE_RWLOCK(dn_fib_tables_lock); -struct dn_fib_table *dn_fib_tables[RT_TABLE_MAX + 1]; static kmem_cache_t *dn_hash_kmem __read_mostly; static int dn_fib_hash_zombies; @@ -361,7 +361,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, { int i, s_i; - s_i = cb->args[3]; + s_i = cb->args[4]; for(i = 0; f; i++, f = f->fn_next) { if (i < s_i) continue; @@ -374,11 +374,11 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, (f->fn_state & DN_S_ZOMBIE) ? 0 : f->fn_type, f->fn_scope, &f->fn_key, dz->dz_order, f->fn_info, NLM_F_MULTI) < 0) { - cb->args[3] = i; + cb->args[4] = i; return -1; } } - cb->args[3] = i; + cb->args[4] = i; return skb->len; } @@ -389,20 +389,20 @@ static __inline__ int dn_hash_dump_zone(struct sk_buff *skb, { int h, s_h; - s_h = cb->args[2]; + s_h = cb->args[3]; for(h = 0; h < dz->dz_divisor; h++) { if (h < s_h) continue; if (h > s_h) - memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); + memset(&cb->args[4], 0, sizeof(cb->args) - 4*sizeof(cb->args[0])); if (dz->dz_hash == NULL || dz->dz_hash[h] == NULL) continue; if (dn_hash_dump_bucket(skb, cb, tb, dz, dz->dz_hash[h]) < 0) { - cb->args[2] = h; + cb->args[3] = h; return -1; } } - cb->args[2] = h; + cb->args[3] = h; return skb->len; } @@ -413,26 +413,63 @@ static int dn_fib_table_dump(struct dn_fib_table *tb, struct sk_buff *skb, struct dn_zone *dz; struct dn_hash *table = (struct dn_hash *)tb->data; - s_m = cb->args[1]; + s_m = cb->args[2]; read_lock(&dn_fib_tables_lock); for(dz = table->dh_zone_list, m = 0; dz; dz = dz->dz_next, m++) { if (m < s_m) continue; if (m > s_m) - memset(&cb->args[2], 0, sizeof(cb->args) - 2*sizeof(cb->args[0])); + memset(&cb->args[3], 0, sizeof(cb->args) - 3*sizeof(cb->args[0])); if (dn_hash_dump_zone(skb, cb, tb, dz) < 0) { - cb->args[1] = m; + cb->args[2] = m; read_unlock(&dn_fib_tables_lock); return -1; } } read_unlock(&dn_fib_tables_lock); - cb->args[1] = m; + cb->args[2] = m; return skb->len; } +int dn_fib_dump(struct sk_buff *skb, struct netlink_callback *cb) +{ + unsigned int h, s_h; + unsigned int e = 0, s_e; + struct dn_fib_table *tb; + struct hlist_node *node; + int dumped = 0; + + if (NLMSG_PAYLOAD(cb->nlh, 0) >= sizeof(struct rtmsg) && + ((struct rtmsg *)NLMSG_DATA(cb->nlh))->rtm_flags&RTM_F_CLONED) + return dn_cache_dump(skb, cb); + + s_h = cb->args[0]; + s_e = cb->args[1]; + + for (h = s_h; h < DN_FIB_TABLE_HASHSZ; h++, s_h = 0) { + e = 0; + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) { + if (e < s_e) + goto next; + if (dumped) + memset(&cb->args[2], 0, sizeof(cb->args) - + 2 * sizeof(cb->args[0])); + if (tb->dump(tb, skb, cb) < 0) + goto out; + dumped = 1; +next: + e++; + } + } +out: + cb->args[1] = e; + cb->args[0] = h; + + return skb->len; +} + static int dn_fib_table_insert(struct dn_fib_table *tb, struct rtmsg *r, struct dn_kern_rta *rta, struct nlmsghdr *n, struct netlink_skb_parms *req) { struct dn_hash *table = (struct dn_hash *)tb->data; @@ -744,6 +781,8 @@ out: struct dn_fib_table *dn_fib_get_table(u32 n, int create) { struct dn_fib_table *t; + struct hlist_node *node; + unsigned int h; if (n < RT_TABLE_MIN) return NULL; @@ -751,8 +790,15 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) if (n > RT_TABLE_MAX) return NULL; - if (dn_fib_tables[n]) - return dn_fib_tables[n]; + h = n & (DN_FIB_TABLE_HASHSZ - 1); + rcu_read_lock(); + hlist_for_each_entry_rcu(t, node, &dn_fib_table_hash[h], hlist) { + if (t->n == n) { + rcu_read_unlock(); + return t; + } + } + rcu_read_unlock(); if (!create) return NULL; @@ -773,33 +819,37 @@ struct dn_fib_table *dn_fib_get_table(u32 n, int create) t->flush = dn_fib_table_flush; t->dump = dn_fib_table_dump; memset(t->data, 0, sizeof(struct dn_hash)); - dn_fib_tables[n] = t; + hlist_add_head_rcu(&t->hlist, &dn_fib_table_hash[h]); return t; } -static void dn_fib_del_tree(u32 n) -{ - struct dn_fib_table *t; - - write_lock(&dn_fib_tables_lock); - t = dn_fib_tables[n]; - dn_fib_tables[n] = NULL; - write_unlock(&dn_fib_tables_lock); - - kfree(t); -} - struct dn_fib_table *dn_fib_empty_table(void) { u32 id; for(id = RT_TABLE_MIN; id <= RT_TABLE_MAX; id++) - if (dn_fib_tables[id] == NULL) + if (dn_fib_get_table(id, 0) == NULL) return dn_fib_get_table(id, 1); return NULL; } +void dn_fib_flush(void) +{ + int flushed = 0; + struct dn_fib_table *tb; + struct hlist_node *node; + unsigned int h; + + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry(tb, node, &dn_fib_table_hash[h], hlist) + flushed += tb->flush(tb); + } + + if (flushed) + dn_rt_cache_flush(-1); +} + void __init dn_fib_table_init(void) { dn_hash_kmem = kmem_cache_create("dn_fib_info_cache", @@ -810,10 +860,17 @@ void __init dn_fib_table_init(void) void __exit dn_fib_table_cleanup(void) { - int i; - - for (i = RT_TABLE_MIN; i <= RT_TABLE_MAX; ++i) - dn_fib_del_tree(i); + struct dn_fib_table *t; + struct hlist_node *node, *next; + unsigned int h; - return; + write_lock(&dn_fib_tables_lock); + for (h = 0; h < DN_FIB_TABLE_HASHSZ; h++) { + hlist_for_each_entry_safe(t, node, next, &dn_fib_table_hash[h], + hlist) { + hlist_del(&t->hlist); + kfree(t); + } + } + write_unlock(&dn_fib_tables_lock); } -- cgit v1.3-8-gc7d7 From 90d41122f79c8c3687d965dde4c6d30a6e0cac4c Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Aug 2006 23:49:16 -0700 Subject: [IPV6] ip6_fib.c: make code static Make the following needlessly global code static: - fib6_walker_lock - struct fib6_walker_list - fib6_walk_continue() - fib6_walk() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 25 ------------------------- net/ipv6/ip6_fib.c | 29 ++++++++++++++++++++++++----- 2 files changed, 24 insertions(+), 30 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index c0660cea9a2f..69c444209781 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -92,28 +92,6 @@ struct fib6_walker_t void *args; }; -extern struct fib6_walker_t fib6_walker_list; -extern rwlock_t fib6_walker_lock; - -static inline void fib6_walker_link(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next = fib6_walker_list.next; - w->prev = &fib6_walker_list; - w->next->prev = w; - w->prev->next = w; - write_unlock_bh(&fib6_walker_lock); -} - -static inline void fib6_walker_unlink(struct fib6_walker_t *w) -{ - write_lock_bh(&fib6_walker_lock); - w->next->prev = w->prev; - w->prev->next = w->next; - w->prev = w->next = w; - write_unlock_bh(&fib6_walker_lock); -} - struct rt6_statistics { __u32 fib_nodes; __u32 fib_route_nodes; @@ -195,9 +173,6 @@ struct fib6_node *fib6_locate(struct fib6_node *root, extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), int prune, void *arg); -extern int fib6_walk(struct fib6_walker_t *w); -extern int fib6_walk_continue(struct fib6_walker_t *w); - extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nlmsghdr *nlh, diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index bececbe9dd2c..be36f4acda94 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -69,8 +69,7 @@ struct fib6_cleaner_t void *arg; }; -DEFINE_RWLOCK(fib6_walker_lock); - +static DEFINE_RWLOCK(fib6_walker_lock); #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S @@ -82,6 +81,8 @@ DEFINE_RWLOCK(fib6_walker_lock); static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); static struct fib6_node * fib6_repair_tree(struct fib6_node *fn); +static int fib6_walk(struct fib6_walker_t *w); +static int fib6_walk_continue(struct fib6_walker_t *w); /* * A routing update causes an increase of the serial number on the @@ -94,13 +95,31 @@ static __u32 rt_sernum; static DEFINE_TIMER(ip6_fib_timer, fib6_run_gc, 0, 0); -struct fib6_walker_t fib6_walker_list = { +static struct fib6_walker_t fib6_walker_list = { .prev = &fib6_walker_list, .next = &fib6_walker_list, }; #define FOR_WALKERS(w) for ((w)=fib6_walker_list.next; (w) != &fib6_walker_list; (w)=(w)->next) +static inline void fib6_walker_link(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next = fib6_walker_list.next; + w->prev = &fib6_walker_list; + w->next->prev = w; + w->prev->next = w; + write_unlock_bh(&fib6_walker_lock); +} + +static inline void fib6_walker_unlink(struct fib6_walker_t *w) +{ + write_lock_bh(&fib6_walker_lock); + w->next->prev = w->prev; + w->prev->next = w->next; + w->prev = w->next = w; + write_unlock_bh(&fib6_walker_lock); +} static __inline__ u32 fib6_new_sernum(void) { u32 n = ++rt_sernum; @@ -1173,7 +1192,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne * <0 -> walk is terminated by an error. */ -int fib6_walk_continue(struct fib6_walker_t *w) +static int fib6_walk_continue(struct fib6_walker_t *w) { struct fib6_node *fn, *pn; @@ -1247,7 +1266,7 @@ int fib6_walk_continue(struct fib6_walker_t *w) } } -int fib6_walk(struct fib6_walker_t *w) +static int fib6_walk(struct fib6_walker_t *w) { int res; -- cgit v1.3-8-gc7d7 From 2aa7f36cdb332a32849afbf25fcbf35dce5b1940 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 14 Aug 2006 23:55:20 -0700 Subject: [DECNET]: cleanups - make the following needlessly global functions static: - dn_fib.c: dn_fib_sync_down() - dn_fib.c: dn_fib_sync_up() - dn_rules.c: dn_fib_rule_action() - remove the following unneeded prototype: - dn_fib.c: dn_cache_dump() Signed-off-by: Adrian Bunk Signed-off-by: Andrew Morton Signed-off-by: David S. Miller --- include/net/dn_fib.h | 3 --- net/decnet/dn_fib.c | 9 +++++---- net/decnet/dn_rules.c | 4 ++-- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/include/net/dn_fib.h b/include/net/dn_fib.h index d97aa10c463f..f01626cbbed6 100644 --- a/include/net/dn_fib.h +++ b/include/net/dn_fib.h @@ -131,9 +131,6 @@ extern __le16 dn_fib_get_attr16(struct rtattr *attr, int attrlen, int type); extern void dn_fib_flush(void); extern void dn_fib_select_multipath(const struct flowi *fl, struct dn_fib_res *res); -extern int dn_fib_sync_down(__le16 local, struct net_device *dev, - int force); -extern int dn_fib_sync_up(struct net_device *dev); /* * dn_tables.c diff --git a/net/decnet/dn_fib.c b/net/decnet/dn_fib.c index 5ccca3ed53bd..1cf010124ec5 100644 --- a/net/decnet/dn_fib.c +++ b/net/decnet/dn_fib.c @@ -55,8 +55,6 @@ #define endfor_nexthops(fi) } -extern int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb); - static DEFINE_SPINLOCK(dn_fib_multipath_lock); static struct dn_fib_info *dn_fib_info_list; static DEFINE_SPINLOCK(dn_fib_info_lock); @@ -80,6 +78,9 @@ static struct [RTN_XRESOLVE] = { .error = -EINVAL, .scope = RT_SCOPE_NOWHERE }, }; +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force); +static int dn_fib_sync_up(struct net_device *dev); + void dn_fib_free_info(struct dn_fib_info *fi) { if (fi->fib_dead == 0) { @@ -651,7 +652,7 @@ static int dn_fib_dnaddr_event(struct notifier_block *this, unsigned long event, return NOTIFY_DONE; } -int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) +static int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; @@ -695,7 +696,7 @@ int dn_fib_sync_down(__le16 local, struct net_device *dev, int force) } -int dn_fib_sync_up(struct net_device *dev) +static int dn_fib_sync_up(struct net_device *dev) { int ret = 0; diff --git a/net/decnet/dn_rules.c b/net/decnet/dn_rules.c index 977bb56c3ce4..50e819edf8c7 100644 --- a/net/decnet/dn_rules.c +++ b/net/decnet/dn_rules.c @@ -75,8 +75,8 @@ int dn_fib_lookup(struct flowi *flp, struct dn_fib_res *res) return err; } -int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, int flags, - struct fib_lookup_arg *arg) +static int dn_fib_rule_action(struct fib_rule *rule, struct flowi *flp, + int flags, struct fib_lookup_arg *arg) { int err = -EAGAIN; struct dn_fib_table *tbl; -- cgit v1.3-8-gc7d7 From d387f6ad10764fc2174373b4a1cca443adee36e3 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:06 -0700 Subject: [NETLINK]: Add notification message sending interface Adds nlmsg_notify() implementing proper notification logic. The message is multicasted to all listeners in the group. The applications the requests orignates from can request a unicast back report in which case said socket will be excluded from the multicast to avoid duplicated notifications. nlmsg_multicast() is extended to take allocation flags to allow notification in atomic contexts. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/genetlink.h | 5 +++-- include/net/netlink.h | 6 ++++-- net/netlabel/netlabel_user.c | 2 +- net/netlink/af_netlink.c | 34 +++++++++++++++++++++++++++++++++- net/netlink/genetlink.c | 2 +- 5 files changed, 42 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 8c2287264266..97d6d3aba9d2 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -133,11 +133,12 @@ static inline int genlmsg_cancel(struct sk_buff *skb, void *hdr) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id + * @flags: allocation flags */ static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, - unsigned int group) + unsigned int group, gfp_t flags) { - return nlmsg_multicast(genl_sock, skb, pid, group); + return nlmsg_multicast(genl_sock, skb, pid, group, flags); } /** diff --git a/include/net/netlink.h b/include/net/netlink.h index 3a5e40b1e045..b154b81d9a7a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -43,6 +43,7 @@ * Message Sending: * nlmsg_multicast() multicast message to several groups * nlmsg_unicast() unicast a message to a single socket + * nlmsg_notify() send notification message * * Message Length Calculations: * nlmsg_msg_size(payload) length of message w/o padding @@ -545,15 +546,16 @@ static inline void nlmsg_free(struct sk_buff *skb) * @skb: netlink message as socket buffer * @pid: own netlink pid to avoid sending to yourself * @group: multicast group id + * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group) + u32 pid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, GFP_KERNEL); + err = netlink_broadcast(sk, skb, pid, group, flags); if (err > 0) err = 0; diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 80022221b0a7..73cbe66e42ff 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -154,5 +154,5 @@ int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) */ int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) { - return genlmsg_multicast(skb, pid, group); + return genlmsg_multicast(skb, pid, group, GFP_KERNEL); } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 0f36ddc0b72d..a80e4456e204 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -1549,6 +1549,38 @@ void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb) skb_pull(skb, msglen); } +/** + * nlmsg_notify - send a notification netlink message + * @sk: netlink socket to use + * @skb: notification message + * @pid: destination netlink pid for reports or 0 + * @group: destination multicast group or 0 + * @report: 1 to report back, 0 to disable + * @flags: allocation flags + */ +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, + unsigned int group, int report, gfp_t flags) +{ + int err = 0; + + if (group) { + int exclude_pid = 0; + + if (report) { + atomic_inc(&skb->users); + exclude_pid = pid; + } + + /* errors reported via destination sk->sk_err */ + nlmsg_multicast(sk, skb, exclude_pid, group, flags); + } + + if (report) + err = nlmsg_unicast(sk, skb, pid); + + return err; +} + #ifdef CONFIG_PROC_FS struct nl_seq_iter { int link; @@ -1802,4 +1834,4 @@ EXPORT_SYMBOL(netlink_set_err); EXPORT_SYMBOL(netlink_set_nonroot); EXPORT_SYMBOL(netlink_unicast); EXPORT_SYMBOL(netlink_unregister_notifier); - +EXPORT_SYMBOL(nlmsg_notify); diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 75bb47a898dd..d32599116c56 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -510,7 +510,7 @@ static int genl_ctrl_event(int event, void *data) if (IS_ERR(msg)) return PTR_ERR(msg); - genlmsg_multicast(msg, 0, GENL_ID_CTRL); + genlmsg_multicast(msg, 0, GENL_ID_CTRL, GFP_KERNEL); break; } -- cgit v1.3-8-gc7d7 From 97676b6b5538b3e059d33b8338e7d5cc41c5f1f1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 15 Aug 2006 00:31:41 -0700 Subject: [RTNETLINK]: Add rtnetlink notification interface Adds rtnl_notify() to send rtnetlink notification messages and rtnl_set_sk_err() to report notification errors as socket errors in order to indicate the need of a resync due to loss of events. nlmsg_report() is added to properly document the meaning of NLM_F_ECHO. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +++ include/net/netlink.h | 17 +++++++++++++++++ net/core/rtnetlink.c | 18 ++++++++++++++++++ 3 files changed, 38 insertions(+) (limited to 'include/net') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0e4f478e2cb5..ecbe0349060c 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -585,6 +585,9 @@ struct rtnetlink_link extern struct rtnetlink_link * rtnetlink_links[NPROTO]; extern int rtnetlink_send(struct sk_buff *skb, u32 pid, u32 group, int echo); extern int rtnl_unicast(struct sk_buff *skb, u32 pid); +extern int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags); +extern void rtnl_set_sk_err(u32 group, int error); extern int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics); extern void __rta_fill(struct sk_buff *skb, int attrtype, int attrlen, const void *data); diff --git a/include/net/netlink.h b/include/net/netlink.h index b154b81d9a7a..bf593eb59e1b 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -65,6 +65,9 @@ * nlmsg_validate() validate netlink message incl. attrs * nlmsg_for_each_attr() loop over all attributes * + * Misc: + * nlmsg_report() report back to application? + * * ------------------------------------------------------------------------ * Attributes Interface * ------------------------------------------------------------------------ @@ -194,6 +197,9 @@ extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, struct nlmsghdr *, int *)); extern void netlink_queue_skip(struct nlmsghdr *nlh, struct sk_buff *skb); +extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, + u32 pid, unsigned int group, int report, + gfp_t flags); extern int nla_validate(struct nlattr *head, int len, int maxtype, struct nla_policy *policy); @@ -375,6 +381,17 @@ static inline int nlmsg_validate(struct nlmsghdr *nlh, int hdrlen, int maxtype, nlmsg_attrlen(nlh, hdrlen), maxtype, policy); } +/** + * nlmsg_report - need to report back to application? + * @nlh: netlink message header + * + * Returns 1 if a report back to the application is requested. + */ +static inline int nlmsg_report(struct nlmsghdr *nlh) +{ + return !!(nlh->nlmsg_flags & NLM_F_ECHO); +} + /** * nlmsg_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index e02fa6a33f42..2b1af17e6389 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -171,6 +171,22 @@ int rtnl_unicast(struct sk_buff *skb, u32 pid) return nlmsg_unicast(rtnl, skb, pid); } +int rtnl_notify(struct sk_buff *skb, u32 pid, u32 group, + struct nlmsghdr *nlh, gfp_t flags) +{ + int report = 0; + + if (nlh) + report = nlmsg_report(nlh); + + return nlmsg_notify(rtnl, skb, pid, group, report, flags); +} + +void rtnl_set_sk_err(u32 group, int error) +{ + netlink_set_err(rtnl, 0, group, error); +} + int rtnetlink_put_metrics(struct sk_buff *skb, u32 *metrics) { struct rtattr *mx = (struct rtattr*)skb->tail; @@ -829,3 +845,5 @@ EXPORT_SYMBOL(rtnl_lock); EXPORT_SYMBOL(rtnl_trylock); EXPORT_SYMBOL(rtnl_unlock); EXPORT_SYMBOL(rtnl_unicast); +EXPORT_SYMBOL(rtnl_notify); +EXPORT_SYMBOL(rtnl_set_sk_err); -- cgit v1.3-8-gc7d7 From 4e902c57417c4c285b98ba2722468d1c3ed83d1b Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 17 Aug 2006 18:14:52 -0700 Subject: [IPv4]: FIB configuration using struct fib_config Introduces struct fib_config replacing the ugly struct kern_rta prone to ordering issues. Avoids creating faked netlink messages for auto generated routes or requests via ioctl. A new interface net/nexthop.h is added to help navigate through nexthop configuration arrays. A new struct nl_info will be used to carry the necessary netlink information to be used for notifications later on. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 55 +++---- include/net/netlink.h | 10 ++ include/net/nexthop.h | 33 ++++ net/ipv4/fib_frontend.c | 364 +++++++++++++++++++++++++++++++++++--------- net/ipv4/fib_hash.c | 94 ++++++------ net/ipv4/fib_lookup.h | 11 +- net/ipv4/fib_semantics.c | 385 ++++++++++++++--------------------------------- net/ipv4/fib_trie.c | 76 ++++------ 8 files changed, 560 insertions(+), 468 deletions(-) create mode 100644 include/net/nexthop.h (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 8e9ba563d342..42ed96fab3f5 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -20,25 +20,32 @@ #include #include -/* WARNING: The ordering of these elements must match ordering - * of RTA_* rtnetlink attribute numbers. - */ -struct kern_rta { - void *rta_dst; - void *rta_src; - int *rta_iif; - int *rta_oif; - void *rta_gw; - u32 *rta_priority; - void *rta_prefsrc; - struct rtattr *rta_mx; - struct rtattr *rta_mp; - unsigned char *rta_protoinfo; - u32 *rta_flow; - struct rta_cacheinfo *rta_ci; - struct rta_session *rta_sess; - u32 *rta_mp_alg; -}; +struct fib_config { + u8 fc_family; + u8 fc_dst_len; + u8 fc_src_len; + u8 fc_tos; + u8 fc_protocol; + u8 fc_scope; + u8 fc_type; + /* 1 byte unused */ + u32 fc_table; + u32 fc_dst; + u32 fc_src; + u32 fc_gw; + int fc_oif; + u32 fc_flags; + u32 fc_priority; + u32 fc_prefsrc; + struct nlattr *fc_mx; + struct rtnexthop *fc_mp; + int fc_mx_len; + int fc_mp_len; + u32 fc_flow; + u32 fc_mp_alg; + u32 fc_nlflags; + struct nl_info fc_nlinfo; + }; struct fib_info; @@ -154,12 +161,8 @@ struct fib_table { u32 tb_id; unsigned tb_stamp; int (*tb_lookup)(struct fib_table *tb, const struct flowi *flp, struct fib_result *res); - int (*tb_insert)(struct fib_table *table, struct rtmsg *r, - struct kern_rta *rta, struct nlmsghdr *n, - struct netlink_skb_parms *req); - int (*tb_delete)(struct fib_table *table, struct rtmsg *r, - struct kern_rta *rta, struct nlmsghdr *n, - struct netlink_skb_parms *req); + int (*tb_insert)(struct fib_table *, struct fib_config *); + int (*tb_delete)(struct fib_table *, struct fib_config *); int (*tb_dump)(struct fib_table *table, struct sk_buff *skb, struct netlink_callback *cb); int (*tb_flush)(struct fib_table *table); @@ -228,8 +231,6 @@ struct rtentry; extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); -extern int fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r); extern u32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ diff --git a/include/net/netlink.h b/include/net/netlink.h index bf593eb59e1b..47044da167c5 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -192,6 +192,16 @@ struct nla_policy { u16 minlen; }; +/** + * struct nl_info - netlink source information + * @nlh: Netlink message header of original request + * @pid: Netlink PID of requesting application + */ +struct nl_info { + struct nlmsghdr *nlh; + u32 pid; +}; + extern void netlink_run_queue(struct sock *sk, unsigned int *qlen, int (*cb)(struct sk_buff *, struct nlmsghdr *, int *)); diff --git a/include/net/nexthop.h b/include/net/nexthop.h new file mode 100644 index 000000000000..3334dbfa5aa4 --- /dev/null +++ b/include/net/nexthop.h @@ -0,0 +1,33 @@ +#ifndef __NET_NEXTHOP_H +#define __NET_NEXTHOP_H + +#include +#include + +static inline int rtnh_ok(const struct rtnexthop *rtnh, int remaining) +{ + return remaining >= sizeof(*rtnh) && + rtnh->rtnh_len >= sizeof(*rtnh) && + rtnh->rtnh_len <= remaining; +} + +static inline struct rtnexthop *rtnh_next(const struct rtnexthop *rtnh, + int *remaining) +{ + int totlen = NLA_ALIGN(rtnh->rtnh_len); + + *remaining -= totlen; + return (struct rtnexthop *) ((char *) rtnh + totlen); +} + +static inline struct nlattr *rtnh_attrs(const struct rtnexthop *rtnh) +{ + return (struct nlattr *) ((char *) rtnh + NLA_ALIGN(sizeof(*rtnh))); +} + +static inline int rtnh_attrlen(const struct rtnexthop *rtnh) +{ + return rtnh->rtnh_len - NLA_ALIGN(sizeof(*rtnh)); +} + +#endif diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index ad4c14f968a1..acc18bdf2dee 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -253,42 +253,190 @@ e_inval: #ifndef CONFIG_IP_NOSIOCRT +static inline u32 sk_extract_addr(struct sockaddr *addr) +{ + return ((struct sockaddr_in *) addr)->sin_addr.s_addr; +} + +static int put_rtax(struct nlattr *mx, int len, int type, u32 value) +{ + struct nlattr *nla; + + nla = (struct nlattr *) ((char *) mx + len); + nla->nla_type = type; + nla->nla_len = nla_attr_size(4); + *(u32 *) nla_data(nla) = value; + + return len + nla_total_size(4); +} + +static int rtentry_to_fib_config(int cmd, struct rtentry *rt, + struct fib_config *cfg) +{ + u32 addr; + int plen; + + memset(cfg, 0, sizeof(*cfg)); + + if (rt->rt_dst.sa_family != AF_INET) + return -EAFNOSUPPORT; + + /* + * Check mask for validity: + * a) it must be contiguous. + * b) destination must have all host bits clear. + * c) if application forgot to set correct family (AF_INET), + * reject request unless it is absolutely clear i.e. + * both family and mask are zero. + */ + plen = 32; + addr = sk_extract_addr(&rt->rt_dst); + if (!(rt->rt_flags & RTF_HOST)) { + u32 mask = sk_extract_addr(&rt->rt_genmask); + + if (rt->rt_genmask.sa_family != AF_INET) { + if (mask || rt->rt_genmask.sa_family) + return -EAFNOSUPPORT; + } + + if (bad_mask(mask, addr)) + return -EINVAL; + + plen = inet_mask_len(mask); + } + + cfg->fc_dst_len = plen; + cfg->fc_dst = addr; + + if (cmd != SIOCDELRT) { + cfg->fc_nlflags = NLM_F_CREATE; + cfg->fc_protocol = RTPROT_BOOT; + } + + if (rt->rt_metric) + cfg->fc_priority = rt->rt_metric - 1; + + if (rt->rt_flags & RTF_REJECT) { + cfg->fc_scope = RT_SCOPE_HOST; + cfg->fc_type = RTN_UNREACHABLE; + return 0; + } + + cfg->fc_scope = RT_SCOPE_NOWHERE; + cfg->fc_type = RTN_UNICAST; + + if (rt->rt_dev) { + char *colon; + struct net_device *dev; + char devname[IFNAMSIZ]; + + if (copy_from_user(devname, rt->rt_dev, IFNAMSIZ-1)) + return -EFAULT; + + devname[IFNAMSIZ-1] = 0; + colon = strchr(devname, ':'); + if (colon) + *colon = 0; + dev = __dev_get_by_name(devname); + if (!dev) + return -ENODEV; + cfg->fc_oif = dev->ifindex; + if (colon) { + struct in_ifaddr *ifa; + struct in_device *in_dev = __in_dev_get_rtnl(dev); + if (!in_dev) + return -ENODEV; + *colon = ':'; + for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) + if (strcmp(ifa->ifa_label, devname) == 0) + break; + if (ifa == NULL) + return -ENODEV; + cfg->fc_prefsrc = ifa->ifa_local; + } + } + + addr = sk_extract_addr(&rt->rt_gateway); + if (rt->rt_gateway.sa_family == AF_INET && addr) { + cfg->fc_gw = addr; + if (rt->rt_flags & RTF_GATEWAY && + inet_addr_type(addr) == RTN_UNICAST) + cfg->fc_scope = RT_SCOPE_UNIVERSE; + } + + if (cmd == SIOCDELRT) + return 0; + + if (rt->rt_flags & RTF_GATEWAY && !cfg->fc_gw) + return -EINVAL; + + if (cfg->fc_scope == RT_SCOPE_NOWHERE) + cfg->fc_scope = RT_SCOPE_LINK; + + if (rt->rt_flags & (RTF_MTU | RTF_WINDOW | RTF_IRTT)) { + struct nlattr *mx; + int len = 0; + + mx = kzalloc(3 * nla_total_size(4), GFP_KERNEL); + if (mx == NULL) + return -ENOMEM; + + if (rt->rt_flags & RTF_MTU) + len = put_rtax(mx, len, RTAX_ADVMSS, rt->rt_mtu - 40); + + if (rt->rt_flags & RTF_WINDOW) + len = put_rtax(mx, len, RTAX_WINDOW, rt->rt_window); + + if (rt->rt_flags & RTF_IRTT) + len = put_rtax(mx, len, RTAX_RTT, rt->rt_irtt << 3); + + cfg->fc_mx = mx; + cfg->fc_mx_len = len; + } + + return 0; +} + /* * Handle IP routing ioctl calls. These are used to manipulate the routing tables */ int ip_rt_ioctl(unsigned int cmd, void __user *arg) { + struct fib_config cfg; + struct rtentry rt; int err; - struct kern_rta rta; - struct rtentry r; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; switch (cmd) { case SIOCADDRT: /* Add a route */ case SIOCDELRT: /* Delete a route */ if (!capable(CAP_NET_ADMIN)) return -EPERM; - if (copy_from_user(&r, arg, sizeof(struct rtentry))) + + if (copy_from_user(&rt, arg, sizeof(rt))) return -EFAULT; + rtnl_lock(); - err = fib_convert_rtentry(cmd, &req.nlh, &req.rtm, &rta, &r); + err = rtentry_to_fib_config(cmd, &rt, &cfg); if (err == 0) { + struct fib_table *tb; + if (cmd == SIOCDELRT) { - struct fib_table *tb = fib_get_table(req.rtm.rtm_table); - err = -ESRCH; + tb = fib_get_table(cfg.fc_table); if (tb) - err = tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_delete(tb, &cfg); + else + err = -ESRCH; } else { - struct fib_table *tb = fib_new_table(req.rtm.rtm_table); - err = -ENOBUFS; + tb = fib_new_table(cfg.fc_table); if (tb) - err = tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + err = tb->tb_insert(tb, &cfg); + else + err = -ENOBUFS; } - kfree(rta.rta_mx); + + /* allocated by rtentry_to_fib_config() */ + kfree(cfg.fc_mx); } rtnl_unlock(); return err; @@ -305,51 +453,134 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static int inet_check_attr(struct rtmsg *r, struct rtattr **rta) +static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { + [RTA_DST] = { .type = NLA_U32 }, + [RTA_SRC] = { .type = NLA_U32 }, + [RTA_IIF] = { .type = NLA_U32 }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_GATEWAY] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_PREFSRC] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, + [RTA_MULTIPATH] = { .minlen = sizeof(struct rtnexthop) }, + [RTA_PROTOINFO] = { .type = NLA_U32 }, + [RTA_FLOW] = { .type = NLA_U32 }, + [RTA_MP_ALGO] = { .type = NLA_U32 }, +}; + +static int rtm_to_fib_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib_config *cfg) { - int i; - - for (i=1; i<=RTA_MAX; i++, rta++) { - struct rtattr *attr = *rta; - if (attr) { - if (RTA_PAYLOAD(attr) < 4) - return -EINVAL; - if (i != RTA_MULTIPATH && i != RTA_METRICS && - i != RTA_TABLE) - *rta = (struct rtattr*)RTA_DATA(attr); + struct nlattr *attr; + int err, remaining; + struct rtmsg *rtm; + + err = nlmsg_validate(nlh, sizeof(*rtm), RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + memset(cfg, 0, sizeof(*cfg)); + + rtm = nlmsg_data(nlh); + cfg->fc_family = rtm->rtm_family; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_tos = rtm->rtm_tos; + cfg->fc_table = rtm->rtm_table; + cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_scope = rtm->rtm_scope; + cfg->fc_type = rtm->rtm_type; + cfg->fc_flags = rtm->rtm_flags; + cfg->fc_nlflags = nlh->nlmsg_flags; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + nlmsg_for_each_attr(attr, nlh, sizeof(struct rtmsg), remaining) { + switch (attr->nla_type) { + case RTA_DST: + cfg->fc_dst = nla_get_u32(attr); + break; + case RTA_SRC: + cfg->fc_src = nla_get_u32(attr); + break; + case RTA_OIF: + cfg->fc_oif = nla_get_u32(attr); + break; + case RTA_GATEWAY: + cfg->fc_gw = nla_get_u32(attr); + break; + case RTA_PRIORITY: + cfg->fc_priority = nla_get_u32(attr); + break; + case RTA_PREFSRC: + cfg->fc_prefsrc = nla_get_u32(attr); + break; + case RTA_METRICS: + cfg->fc_mx = nla_data(attr); + cfg->fc_mx_len = nla_len(attr); + break; + case RTA_MULTIPATH: + cfg->fc_mp = nla_data(attr); + cfg->fc_mp_len = nla_len(attr); + break; + case RTA_FLOW: + cfg->fc_flow = nla_get_u32(attr); + break; + case RTA_MP_ALGO: + cfg->fc_mp_alg = nla_get_u32(attr); + break; + case RTA_TABLE: + cfg->fc_table = nla_get_u32(attr); + break; } } + return 0; +errout: + return err; } int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_get_table(rtm_get_table(rta, r->rtm_table)); - if (tb) - return tb->tb_delete(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ESRCH; + tb = fib_get_table(cfg.fc_table); + if (tb == NULL) { + err = -ESRCH; + goto errout; + } + + err = tb->tb_delete(tb, &cfg); +errout: + return err; } int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct fib_table * tb; - struct rtattr **rta = arg; - struct rtmsg *r = NLMSG_DATA(nlh); + struct fib_config cfg; + struct fib_table *tb; + int err; - if (inet_check_attr(r, rta)) - return -EINVAL; + err = rtm_to_fib_config(skb, nlh, &cfg); + if (err < 0) + goto errout; - tb = fib_new_table(rtm_get_table(rta, r->rtm_table)); - if (tb) - return tb->tb_insert(tb, r, (struct kern_rta*)rta, nlh, &NETLINK_CB(skb)); - return -ENOBUFS; + tb = fib_new_table(cfg.fc_table); + if (tb == NULL) { + err = -ENOBUFS; + goto errout; + } + + err = tb->tb_insert(tb, &cfg); +errout: + return err; } int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb) @@ -396,17 +627,19 @@ out: only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, u32 dst, int dst_len, + struct in_ifaddr *ifa) { - struct fib_table * tb; - struct { - struct nlmsghdr nlh; - struct rtmsg rtm; - } req; - struct kern_rta rta; - - memset(&req.rtm, 0, sizeof(req.rtm)); - memset(&rta, 0, sizeof(rta)); + struct fib_table *tb; + struct fib_config cfg = { + .fc_protocol = RTPROT_KERNEL, + .fc_type = type, + .fc_dst = dst, + .fc_dst_len = dst_len, + .fc_prefsrc = ifa->ifa_local, + .fc_oif = ifa->ifa_dev->dev->ifindex, + .fc_nlflags = NLM_F_CREATE | NLM_F_APPEND, + }; if (type == RTN_UNICAST) tb = fib_new_table(RT_TABLE_MAIN); @@ -416,26 +649,17 @@ static void fib_magic(int cmd, int type, u32 dst, int dst_len, struct in_ifaddr if (tb == NULL) return; - req.nlh.nlmsg_len = sizeof(req); - req.nlh.nlmsg_type = cmd; - req.nlh.nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE|NLM_F_APPEND; - req.nlh.nlmsg_pid = 0; - req.nlh.nlmsg_seq = 0; + cfg.fc_table = tb->tb_id; - req.rtm.rtm_dst_len = dst_len; - req.rtm.rtm_table = tb->tb_id; - req.rtm.rtm_protocol = RTPROT_KERNEL; - req.rtm.rtm_scope = (type != RTN_LOCAL ? RT_SCOPE_LINK : RT_SCOPE_HOST); - req.rtm.rtm_type = type; - - rta.rta_dst = &dst; - rta.rta_prefsrc = &ifa->ifa_local; - rta.rta_oif = &ifa->ifa_dev->dev->ifindex; + if (type != RTN_LOCAL) + cfg.fc_scope = RT_SCOPE_LINK; + else + cfg.fc_scope = RT_SCOPE_HOST; if (cmd == RTM_NEWROUTE) - tb->tb_insert(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_insert(tb, &cfg); else - tb->tb_delete(tb, &req.rtm, &rta, &req.nlh, NULL); + tb->tb_delete(tb, &cfg); } void fib_add_ifaddr(struct in_ifaddr *ifa) diff --git a/net/ipv4/fib_hash.c b/net/ipv4/fib_hash.c index b5bee1a71e5c..357557549ce5 100644 --- a/net/ipv4/fib_hash.c +++ b/net/ipv4/fib_hash.c @@ -379,42 +379,39 @@ static struct fib_node *fib_find_node(struct fn_zone *fz, u32 key) return NULL; } -static int -fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_insert(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash *) tb->tb_data; struct fib_node *new_f, *f; struct fib_alias *fa, *new_fa; struct fn_zone *fz; struct fib_info *fi; - int z = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + u8 tos = cfg->fc_tos; u32 key; int err; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - fz = table->fn_zones[z]; - if (!fz && !(fz = fn_new_zone(table, z))) + + fz = table->fn_zones[cfg->fc_dst_len]; + if (!fz && !(fz = fn_new_zone(table, cfg->fc_dst_len))) return -ENOBUFS; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } - if ((fi = fib_create_info(r, rta, n, &err)) == NULL) - return err; + fi = fib_create_info(cfg); + if (IS_ERR(fi)) + return PTR_ERR(fi); if (fz->fz_nent > (fz->fz_divisor<<1) && fz->fz_divisor < FZ_MAX_DIVISOR && - (z==32 || (1< fz->fz_divisor)) + (cfg->fc_dst_len == 32 || + (1 << cfg->fc_dst_len) > fz->fz_divisor)) fn_rehash_zone(fz); f = fib_find_node(fz, key); @@ -440,18 +437,18 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (n->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (n->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; write_lock_bh(&fib_hash_lock); fi_drop = fa->fa_info; fa->fa_info = fi; - fa->fa_type = type; - fa->fa_scope = r->rtm_scope; + fa->fa_type = cfg->fc_type; + fa->fa_scope = cfg->fc_scope; state = fa->fa_state; fa->fa_state &= ~FA_S_ACCESSED; fib_hash_genid++; @@ -474,17 +471,17 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) goto out; } - if (!(n->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(n->nlmsg_flags&NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -506,8 +503,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* @@ -526,7 +523,8 @@ fn_hash_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fz->fz_nent++; rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, key, new_fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_NEWROUTE, key, new_fa, cfg->fc_dst_len, tb->tb_id, + &cfg->fc_nlinfo); return 0; out_free_new_fa: @@ -537,30 +535,25 @@ out: } -static int -fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *n, struct netlink_skb_parms *req) +static int fn_hash_delete(struct fib_table *tb, struct fib_config *cfg) { struct fn_hash *table = (struct fn_hash*)tb->tb_data; struct fib_node *f; struct fib_alias *fa, *fa_to_delete; - int z = r->rtm_dst_len; struct fn_zone *fz; u32 key; - u8 tos = r->rtm_tos; - if (z > 32) + if (cfg->fc_dst_len > 32) return -EINVAL; - if ((fz = table->fn_zones[z]) == NULL) + + if ((fz = table->fn_zones[cfg->fc_dst_len]) == NULL) return -ESRCH; key = 0; - if (rta->rta_dst) { - u32 dst; - memcpy(&dst, rta->rta_dst, 4); - if (dst & ~FZ_MASK(fz)) + if (cfg->fc_dst) { + if (cfg->fc_dst & ~FZ_MASK(fz)) return -EINVAL; - key = fz_key(dst, fz); + key = fz_key(cfg->fc_dst, fz); } f = fib_find_node(fz, key); @@ -568,7 +561,7 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (!f) fa = NULL; else - fa = fib_find_alias(&f->fn_alias, tos, 0); + fa = fib_find_alias(&f->fn_alias, cfg->fc_tos, 0); if (!fa) return -ESRCH; @@ -577,16 +570,16 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, list_for_each_entry_continue(fa, &f->fn_alias, fa_list) { struct fib_info *fi = fa->fa_info; - if (fa->fa_tos != tos) + if (fa->fa_tos != cfg->fc_tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, n, rta, fi) == 0) { + if ((!cfg->fc_type || + fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -596,7 +589,8 @@ fn_hash_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, int kill_fn; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, key, fa, z, tb->tb_id, n, req); + rtmsg_fib(RTM_DELROUTE, key, fa, cfg->fc_dst_len, + tb->tb_id, &cfg->fc_nlinfo); kill_fn = 0; write_lock_bh(&fib_hash_lock); diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index ddd52496b451..d6d1a89e4003 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -23,19 +23,14 @@ extern int fib_semantic_match(struct list_head *head, struct fib_result *res, __u32 zone, __u32 mask, int prefixlen); extern void fib_release_info(struct fib_info *); -extern struct fib_info *fib_create_info(const struct rtmsg *r, - struct kern_rta *rta, - const struct nlmsghdr *, - int *err); -extern int fib_nh_match(struct rtmsg *r, struct nlmsghdr *, - struct kern_rta *rta, struct fib_info *fi); +extern struct fib_info *fib_create_info(struct fib_config *cfg); +extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, u32 tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req); + int dst_len, u32 tb_id, struct nl_info *info); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); extern int fib_detect_death(struct fib_info *fi, int order, diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 5dfdad5cbcd4..340f9db389e5 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "fib_lookup.h" @@ -273,27 +274,27 @@ int ip_fib_check_default(u32 gw, struct net_device *dev) } void rtmsg_fib(int event, u32 key, struct fib_alias *fa, - int z, u32 tb_id, - struct nlmsghdr *n, struct netlink_skb_parms *req) + int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : n->nlmsg_pid; int payload = sizeof(struct rtmsg) + 256; + u32 seq = info->nlh ? info->nlh->nlmsg_seq : 0; int err = -ENOBUFS; skb = nlmsg_new(nlmsg_total_size(payload), GFP_KERNEL); if (skb == NULL) goto errout; - err = fib_dump_info(skb, pid, n->nlmsg_seq, event, tb_id, - fa->fa_type, fa->fa_scope, &key, z, fa->fa_tos, - fa->fa_info, 0); + err = fib_dump_info(skb, info->pid, seq, event, tb_id, + fa->fa_type, fa->fa_scope, &key, dst_len, + fa->fa_tos, fa->fa_info, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_notify(skb, pid, RTNLGRP_IPV4_ROUTE, n, GFP_KERNEL); + err = rtnl_notify(skb, info->pid, RTNLGRP_IPV4_ROUTE, + info->nlh, GFP_KERNEL); errout: if (err < 0) rtnl_set_sk_err(RTNLGRP_IPV4_ROUTE, err); @@ -342,102 +343,100 @@ int fib_detect_death(struct fib_info *fi, int order, #ifdef CONFIG_IP_ROUTE_MULTIPATH -static u32 fib_get_attr32(struct rtattr *attr, int attrlen, int type) -{ - while (RTA_OK(attr,attrlen)) { - if (attr->rta_type == type) - return *(u32*)RTA_DATA(attr); - attr = RTA_NEXT(attr, attrlen); - } - return 0; -} - -static int -fib_count_nexthops(struct rtattr *rta) +static int fib_count_nexthops(struct rtnexthop *rtnh, int remaining) { int nhs = 0; - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - while (nhlen >= (int)sizeof(struct rtnexthop)) { - if ((nhlen -= nhp->rtnh_len) < 0) - return 0; + while (rtnh_ok(rtnh, remaining)) { nhs++; - nhp = RTNH_NEXT(nhp); - }; - return nhs; + rtnh = rtnh_next(rtnh, &remaining); + } + + /* leftover implies invalid nexthop configuration, discard it */ + return remaining > 0 ? 0 : nhs; } -static int -fib_get_nhs(struct fib_info *fi, const struct rtattr *rta, const struct rtmsg *r) +static int fib_get_nhs(struct fib_info *fi, struct rtnexthop *rtnh, + int remaining, struct fib_config *cfg) { - struct rtnexthop *nhp = RTA_DATA(rta); - int nhlen = RTA_PAYLOAD(rta); - change_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + int attrlen; + + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - nh->nh_flags = (r->rtm_flags&~0xFF) | nhp->rtnh_flags; - nh->nh_oif = nhp->rtnh_ifindex; - nh->nh_weight = nhp->rtnh_hops + 1; - if (attrlen) { - nh->nh_gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); + + nh->nh_flags = (cfg->fc_flags & ~0xFF) | rtnh->rtnh_flags; + nh->nh_oif = rtnh->rtnh_ifindex; + nh->nh_weight = rtnh->rtnh_hops + 1; + + attrlen = rtnh_attrlen(rtnh); + if (attrlen > 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + nh->nh_gw = nla ? nla_get_u32(nla) : 0; #ifdef CONFIG_NET_CLS_ROUTE - nh->nh_tclassid = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); + nla = nla_find(attrs, attrlen, RTA_FLOW); + nh->nh_tclassid = nla ? nla_get_u32(nla) : 0; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); + return 0; } #endif -int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, - struct fib_info *fi) +int fib_nh_match(struct fib_config *cfg, struct fib_info *fi) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - struct rtnexthop *nhp; - int nhlen; + struct rtnexthop *rtnh; + int remaining; #endif - if (rta->rta_priority && - *rta->rta_priority != fi->fib_priority) + if (cfg->fc_priority && cfg->fc_priority != fi->fib_priority) return 1; - if (rta->rta_oif || rta->rta_gw) { - if ((!rta->rta_oif || *rta->rta_oif == fi->fib_nh->nh_oif) && - (!rta->rta_gw || memcmp(rta->rta_gw, &fi->fib_nh->nh_gw, 4) == 0)) + if (cfg->fc_oif || cfg->fc_gw) { + if ((!cfg->fc_oif || cfg->fc_oif == fi->fib_nh->nh_oif) && + (!cfg->fc_gw || cfg->fc_gw == fi->fib_nh->nh_gw)) return 0; return 1; } #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp == NULL) + if (cfg->fc_mp == NULL) return 0; - nhp = RTA_DATA(rta->rta_mp); - nhlen = RTA_PAYLOAD(rta->rta_mp); + + rtnh = cfg->fc_mp; + remaining = cfg->fc_mp_len; for_nexthops(fi) { - int attrlen = nhlen - sizeof(struct rtnexthop); - u32 gw; + int attrlen; - if (attrlen < 0 || (nhlen -= nhp->rtnh_len) < 0) + if (!rtnh_ok(rtnh, remaining)) return -EINVAL; - if (nhp->rtnh_ifindex && nhp->rtnh_ifindex != nh->nh_oif) + + if (rtnh->rtnh_ifindex && rtnh->rtnh_ifindex != nh->nh_oif) return 1; - if (attrlen) { - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_GATEWAY); - if (gw && gw != nh->nh_gw) + + attrlen = rtnh_attrlen(rtnh); + if (attrlen < 0) { + struct nlattr *nla, *attrs = rtnh_attrs(rtnh); + + nla = nla_find(attrs, attrlen, RTA_GATEWAY); + if (nla && nla_get_u32(nla) != nh->nh_gw) return 1; #ifdef CONFIG_NET_CLS_ROUTE - gw = fib_get_attr32(RTNH_DATA(nhp), attrlen, RTA_FLOW); - if (gw && gw != nh->nh_tclassid) + nla = nla_find(attrs, attrlen, RTA_FLOW); + if (nla && nla_get_u32(nla) != nh->nh_tclassid) return 1; #endif } - nhp = RTNH_NEXT(nhp); + + rtnh = rtnh_next(rtnh, &remaining); } endfor_nexthops(fi); #endif return 0; @@ -488,7 +487,8 @@ int fib_nh_match(struct rtmsg *r, struct nlmsghdr *nlh, struct kern_rta *rta, |-> {local prefix} (terminal node) */ -static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_nh *nh) +static int fib_check_nh(struct fib_config *cfg, struct fib_info *fi, + struct fib_nh *nh) { int err; @@ -502,7 +502,7 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n if (nh->nh_flags&RTNH_F_ONLINK) { struct net_device *dev; - if (r->rtm_scope >= RT_SCOPE_LINK) + if (cfg->fc_scope >= RT_SCOPE_LINK) return -EINVAL; if (inet_addr_type(nh->nh_gw) != RTN_UNICAST) return -EINVAL; @@ -516,10 +516,15 @@ static int fib_check_nh(const struct rtmsg *r, struct fib_info *fi, struct fib_n return 0; } { - struct flowi fl = { .nl_u = { .ip4_u = - { .daddr = nh->nh_gw, - .scope = r->rtm_scope + 1 } }, - .oif = nh->nh_oif }; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = nh->nh_gw, + .scope = cfg->fc_scope + 1, + }, + }, + .oif = nh->nh_oif, + }; /* It is not necessary, but requires a bit of thinking */ if (fl.fl4_scope < RT_SCOPE_LINK) @@ -646,39 +651,28 @@ static void fib_hash_move(struct hlist_head *new_info_hash, fib_hash_free(old_laddrhash, bytes); } -struct fib_info * -fib_create_info(const struct rtmsg *r, struct kern_rta *rta, - const struct nlmsghdr *nlh, int *errp) +struct fib_info *fib_create_info(struct fib_config *cfg) { int err; struct fib_info *fi = NULL; struct fib_info *ofi; -#ifdef CONFIG_IP_ROUTE_MULTIPATH int nhs = 1; -#else - const int nhs = 1; -#endif -#ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - u32 mp_alg = IP_MP_ALG_NONE; -#endif /* Fast check to catch the most weird cases */ - if (fib_props[r->rtm_type].scope > r->rtm_scope) + if (fib_props[cfg->fc_type].scope > cfg->fc_scope) goto err_inval; #ifdef CONFIG_IP_ROUTE_MULTIPATH - if (rta->rta_mp) { - nhs = fib_count_nexthops(rta->rta_mp); + if (cfg->fc_mp) { + nhs = fib_count_nexthops(cfg->fc_mp, cfg->fc_mp_len); if (nhs == 0) goto err_inval; } #endif #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - if (rta->rta_mp_alg) { - mp_alg = *rta->rta_mp_alg; - - if (mp_alg < IP_MP_ALG_NONE || - mp_alg > IP_MP_ALG_MAX) + if (cfg->fc_mp_alg) { + if (cfg->fc_mp_alg < IP_MP_ALG_NONE || + cfg->fc_mp_alg > IP_MP_ALG_MAX) goto err_inval; } #endif @@ -714,43 +708,42 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; fib_info_cnt++; - fi->fib_protocol = r->rtm_protocol; + fi->fib_protocol = cfg->fc_protocol; + fi->fib_flags = cfg->fc_flags; + fi->fib_priority = cfg->fc_priority; + fi->fib_prefsrc = cfg->fc_prefsrc; fi->fib_nhs = nhs; change_nexthops(fi) { nh->nh_parent = fi; } endfor_nexthops(fi) - fi->fib_flags = r->rtm_flags; - if (rta->rta_priority) - fi->fib_priority = *rta->rta_priority; - if (rta->rta_mx) { - int attrlen = RTA_PAYLOAD(rta->rta_mx); - struct rtattr *attr = RTA_DATA(rta->rta_mx); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) goto err_inval; - fi->fib_metrics[flavor-1] = *(unsigned*)RTA_DATA(attr); + fi->fib_metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } - if (rta->rta_prefsrc) - memcpy(&fi->fib_prefsrc, rta->rta_prefsrc, 4); - if (rta->rta_mp) { + if (cfg->fc_mp) { #ifdef CONFIG_IP_ROUTE_MULTIPATH - if ((err = fib_get_nhs(fi, rta->rta_mp, r)) != 0) + err = fib_get_nhs(fi, cfg->fc_mp, cfg->fc_mp_len, cfg); + if (err != 0) goto failure; - if (rta->rta_oif && fi->fib_nh->nh_oif != *rta->rta_oif) + if (cfg->fc_oif && fi->fib_nh->nh_oif != cfg->fc_oif) goto err_inval; - if (rta->rta_gw && memcmp(&fi->fib_nh->nh_gw, rta->rta_gw, 4)) + if (cfg->fc_gw && fi->fib_nh->nh_gw != cfg->fc_gw) goto err_inval; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow && memcmp(&fi->fib_nh->nh_tclassid, rta->rta_flow, 4)) + if (cfg->fc_flow && fi->fib_nh->nh_tclassid != cfg->fc_flow) goto err_inval; #endif #else @@ -758,34 +751,32 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, #endif } else { struct fib_nh *nh = fi->fib_nh; - if (rta->rta_oif) - nh->nh_oif = *rta->rta_oif; - if (rta->rta_gw) - memcpy(&nh->nh_gw, rta->rta_gw, 4); + + nh->nh_oif = cfg->fc_oif; + nh->nh_gw = cfg->fc_gw; + nh->nh_flags = cfg->fc_flags; #ifdef CONFIG_NET_CLS_ROUTE - if (rta->rta_flow) - memcpy(&nh->nh_tclassid, rta->rta_flow, 4); + nh->nh_tclassid = cfg->fc_flow; #endif - nh->nh_flags = r->rtm_flags; #ifdef CONFIG_IP_ROUTE_MULTIPATH nh->nh_weight = 1; #endif } #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - fi->fib_mp_alg = mp_alg; + fi->fib_mp_alg = cfg->fc_mp_alg; #endif - if (fib_props[r->rtm_type].error) { - if (rta->rta_gw || rta->rta_oif || rta->rta_mp) + if (fib_props[cfg->fc_type].error) { + if (cfg->fc_gw || cfg->fc_oif || cfg->fc_mp) goto err_inval; goto link_it; } - if (r->rtm_scope > RT_SCOPE_HOST) + if (cfg->fc_scope > RT_SCOPE_HOST) goto err_inval; - if (r->rtm_scope == RT_SCOPE_HOST) { + if (cfg->fc_scope == RT_SCOPE_HOST) { struct fib_nh *nh = fi->fib_nh; /* Local address is added. */ @@ -798,14 +789,14 @@ fib_create_info(const struct rtmsg *r, struct kern_rta *rta, goto failure; } else { change_nexthops(fi) { - if ((err = fib_check_nh(r, fi, nh)) != 0) + if ((err = fib_check_nh(cfg, fi, nh)) != 0) goto failure; } endfor_nexthops(fi) } if (fi->fib_prefsrc) { - if (r->rtm_type != RTN_LOCAL || rta->rta_dst == NULL || - memcmp(&fi->fib_prefsrc, rta->rta_dst, 4)) + if (cfg->fc_type != RTN_LOCAL || !cfg->fc_dst || + fi->fib_prefsrc != cfg->fc_dst) if (inet_addr_type(fi->fib_prefsrc) != RTN_LOCAL) goto err_inval; } @@ -846,12 +837,12 @@ err_inval: err = -EINVAL; failure: - *errp = err; if (fi) { fi->fib_dead = 1; free_fib_info(fi); } - return NULL; + + return ERR_PTR(err); } /* Note! fib_semantic_match intentionally uses RCU list functions. */ @@ -1012,150 +1003,6 @@ rtattr_failure: return -1; } -#ifndef CONFIG_IP_NOSIOCRT - -int -fib_convert_rtentry(int cmd, struct nlmsghdr *nl, struct rtmsg *rtm, - struct kern_rta *rta, struct rtentry *r) -{ - int plen; - u32 *ptr; - - memset(rtm, 0, sizeof(*rtm)); - memset(rta, 0, sizeof(*rta)); - - if (r->rt_dst.sa_family != AF_INET) - return -EAFNOSUPPORT; - - /* Check mask for validity: - a) it must be contiguous. - b) destination must have all host bits clear. - c) if application forgot to set correct family (AF_INET), - reject request unless it is absolutely clear i.e. - both family and mask are zero. - */ - plen = 32; - ptr = &((struct sockaddr_in*)&r->rt_dst)->sin_addr.s_addr; - if (!(r->rt_flags&RTF_HOST)) { - u32 mask = ((struct sockaddr_in*)&r->rt_genmask)->sin_addr.s_addr; - if (r->rt_genmask.sa_family != AF_INET) { - if (mask || r->rt_genmask.sa_family) - return -EAFNOSUPPORT; - } - if (bad_mask(mask, *ptr)) - return -EINVAL; - plen = inet_mask_len(mask); - } - - nl->nlmsg_flags = NLM_F_REQUEST; - nl->nlmsg_pid = 0; - nl->nlmsg_seq = 0; - nl->nlmsg_len = NLMSG_LENGTH(sizeof(*rtm)); - if (cmd == SIOCDELRT) { - nl->nlmsg_type = RTM_DELROUTE; - nl->nlmsg_flags = 0; - } else { - nl->nlmsg_type = RTM_NEWROUTE; - nl->nlmsg_flags = NLM_F_REQUEST|NLM_F_CREATE; - rtm->rtm_protocol = RTPROT_BOOT; - } - - rtm->rtm_dst_len = plen; - rta->rta_dst = ptr; - - if (r->rt_metric) { - *(u32*)&r->rt_pad3 = r->rt_metric - 1; - rta->rta_priority = (u32*)&r->rt_pad3; - } - if (r->rt_flags&RTF_REJECT) { - rtm->rtm_scope = RT_SCOPE_HOST; - rtm->rtm_type = RTN_UNREACHABLE; - return 0; - } - rtm->rtm_scope = RT_SCOPE_NOWHERE; - rtm->rtm_type = RTN_UNICAST; - - if (r->rt_dev) { - char *colon; - struct net_device *dev; - char devname[IFNAMSIZ]; - - if (copy_from_user(devname, r->rt_dev, IFNAMSIZ-1)) - return -EFAULT; - devname[IFNAMSIZ-1] = 0; - colon = strchr(devname, ':'); - if (colon) - *colon = 0; - dev = __dev_get_by_name(devname); - if (!dev) - return -ENODEV; - rta->rta_oif = &dev->ifindex; - if (colon) { - struct in_ifaddr *ifa; - struct in_device *in_dev = __in_dev_get_rtnl(dev); - if (!in_dev) - return -ENODEV; - *colon = ':'; - for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) - if (strcmp(ifa->ifa_label, devname) == 0) - break; - if (ifa == NULL) - return -ENODEV; - rta->rta_prefsrc = &ifa->ifa_local; - } - } - - ptr = &((struct sockaddr_in*)&r->rt_gateway)->sin_addr.s_addr; - if (r->rt_gateway.sa_family == AF_INET && *ptr) { - rta->rta_gw = ptr; - if (r->rt_flags&RTF_GATEWAY && inet_addr_type(*ptr) == RTN_UNICAST) - rtm->rtm_scope = RT_SCOPE_UNIVERSE; - } - - if (cmd == SIOCDELRT) - return 0; - - if (r->rt_flags&RTF_GATEWAY && rta->rta_gw == NULL) - return -EINVAL; - - if (rtm->rtm_scope == RT_SCOPE_NOWHERE) - rtm->rtm_scope = RT_SCOPE_LINK; - - if (r->rt_flags&(RTF_MTU|RTF_WINDOW|RTF_IRTT)) { - struct rtattr *rec; - struct rtattr *mx = kmalloc(RTA_LENGTH(3*RTA_LENGTH(4)), GFP_KERNEL); - if (mx == NULL) - return -ENOMEM; - rta->rta_mx = mx; - mx->rta_type = RTA_METRICS; - mx->rta_len = RTA_LENGTH(0); - if (r->rt_flags&RTF_MTU) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_ADVMSS; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_mtu - 40; - } - if (r->rt_flags&RTF_WINDOW) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_WINDOW; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_window; - } - if (r->rt_flags&RTF_IRTT) { - rec = (void*)((char*)mx + RTA_ALIGN(mx->rta_len)); - rec->rta_type = RTAX_RTT; - rec->rta_len = RTA_LENGTH(4); - mx->rta_len += RTA_LENGTH(4); - *(u32*)RTA_DATA(rec) = r->rt_irtt<<3; - } - } - return 0; -} - -#endif - /* Update FIB if: - local address disappeared -> we must delete all the entries diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2a580eb2579b..41bef0a88ab6 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1124,17 +1124,14 @@ err: return fa_head; } -static int -fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_insert(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; struct fib_alias *fa, *new_fa; struct list_head *fa_head = NULL; struct fib_info *fi; - int plen = r->rtm_dst_len; - int type = r->rtm_type; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; u32 key, mask; int err; struct leaf *l; @@ -1142,11 +1139,7 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); pr_debug("Insert table=%u %08x/%d\n", tb->tb_id, key, plen); @@ -1157,10 +1150,11 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, key = key & mask; - fi = fib_create_info(r, rta, nlhdr, &err); - - if (!fi) + fi = fib_create_info(cfg); + if (IS_ERR(fi)) { + err = PTR_ERR(fi); goto err; + } l = fib_find_node(t, key); fa = NULL; @@ -1185,10 +1179,10 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, struct fib_alias *fa_orig; err = -EEXIST; - if (nlhdr->nlmsg_flags & NLM_F_EXCL) + if (cfg->fc_nlflags & NLM_F_EXCL) goto out; - if (nlhdr->nlmsg_flags & NLM_F_REPLACE) { + if (cfg->fc_nlflags & NLM_F_REPLACE) { struct fib_info *fi_drop; u8 state; @@ -1200,8 +1194,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, fi_drop = fa->fa_info; new_fa->fa_tos = fa->fa_tos; new_fa->fa_info = fi; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; state = fa->fa_state; new_fa->fa_state &= ~FA_S_ACCESSED; @@ -1224,17 +1218,17 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, break; if (fa->fa_info->fib_priority != fi->fib_priority) break; - if (fa->fa_type == type && - fa->fa_scope == r->rtm_scope && + if (fa->fa_type == cfg->fc_type && + fa->fa_scope == cfg->fc_scope && fa->fa_info == fi) { goto out; } } - if (!(nlhdr->nlmsg_flags & NLM_F_APPEND)) + if (!(cfg->fc_nlflags & NLM_F_APPEND)) fa = fa_orig; } err = -ENOENT; - if (!(nlhdr->nlmsg_flags & NLM_F_CREATE)) + if (!(cfg->fc_nlflags & NLM_F_CREATE)) goto out; err = -ENOBUFS; @@ -1244,8 +1238,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, new_fa->fa_info = fi; new_fa->fa_tos = tos; - new_fa->fa_type = type; - new_fa->fa_scope = r->rtm_scope; + new_fa->fa_type = cfg->fc_type; + new_fa->fa_scope = cfg->fc_scope; new_fa->fa_state = 0; /* * Insert new entry to the list. @@ -1262,7 +1256,8 @@ fn_trie_insert(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, (fa ? &fa->fa_list : fa_head)); rt_cache_flush(-1); - rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_NEWROUTE, htonl(key), new_fa, plen, tb->tb_id, + &cfg->fc_nlinfo); succeeded: return 0; @@ -1548,28 +1543,21 @@ static int trie_leaf_remove(struct trie *t, t_key key) return 1; } -static int -fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, - struct nlmsghdr *nlhdr, struct netlink_skb_parms *req) +static int fn_trie_delete(struct fib_table *tb, struct fib_config *cfg) { struct trie *t = (struct trie *) tb->tb_data; u32 key, mask; - int plen = r->rtm_dst_len; - u8 tos = r->rtm_tos; + int plen = cfg->fc_dst_len; + u8 tos = cfg->fc_tos; struct fib_alias *fa, *fa_to_delete; struct list_head *fa_head; struct leaf *l; struct leaf_info *li; - if (plen > 32) return -EINVAL; - key = 0; - if (rta->rta_dst) - memcpy(&key, rta->rta_dst, 4); - - key = ntohl(key); + key = ntohl(cfg->fc_dst); mask = ntohl(inet_make_mask(plen)); if (key & ~mask) @@ -1598,13 +1586,12 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, if (fa->fa_tos != tos) break; - if ((!r->rtm_type || - fa->fa_type == r->rtm_type) && - (r->rtm_scope == RT_SCOPE_NOWHERE || - fa->fa_scope == r->rtm_scope) && - (!r->rtm_protocol || - fi->fib_protocol == r->rtm_protocol) && - fib_nh_match(r, nlhdr, rta, fi) == 0) { + if ((!cfg->fc_type || fa->fa_type == cfg->fc_type) && + (cfg->fc_scope == RT_SCOPE_NOWHERE || + fa->fa_scope == cfg->fc_scope) && + (!cfg->fc_protocol || + fi->fib_protocol == cfg->fc_protocol) && + fib_nh_match(cfg, fi) == 0) { fa_to_delete = fa; break; } @@ -1614,7 +1601,8 @@ fn_trie_delete(struct fib_table *tb, struct rtmsg *r, struct kern_rta *rta, return -ESRCH; fa = fa_to_delete; - rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, nlhdr, req); + rtmsg_fib(RTM_DELROUTE, htonl(key), fa, plen, tb->tb_id, + &cfg->fc_nlinfo); l = fib_find_node(t, key); li = find_leaf_info(l, plen); -- cgit v1.3-8-gc7d7 From d889ce3b29e55b91257964b4c9aac70b91fedd91 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Thu, 17 Aug 2006 18:15:44 -0700 Subject: [IPv4]: Convert route get to new netlink api Fixes various unvalidated netlink attributes causing memory corruptions when left empty by userspace applications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip_fib.h | 1 + net/ipv4/fib_frontend.c | 2 +- net/ipv4/route.c | 84 ++++++++++++++++++++++++++----------------------- 3 files changed, 47 insertions(+), 40 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 42ed96fab3f5..fcc159a4ac17 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -216,6 +216,7 @@ extern void fib_select_default(const struct flowi *flp, struct fib_result *res); #endif /* CONFIG_IP_MULTIPLE_TABLES */ /* Exported by fib_frontend.c */ +extern struct nla_policy rtm_ipv4_policy[]; extern void ip_fib_init(void); extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index d537c933abe3..d0abeab16e66 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -453,7 +453,7 @@ int ip_rt_ioctl(unsigned int cmd, void *arg) #endif -static struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { +struct nla_policy rtm_ipv4_policy[RTA_MAX+1] __read_mostly = { [RTA_DST] = { .type = NLA_U32 }, [RTA_SRC] = { .type = NLA_U32 }, [RTA_IIF] = { .type = NLA_U32 }, diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 31b67059ac29..a4d4cb85a16c 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2737,18 +2737,24 @@ nla_put_failure: int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) { - struct rtattr **rta = arg; - struct rtmsg *rtm = NLMSG_DATA(nlh); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; - u32 dst = 0; - u32 src = 0; - int iif = 0; - int err = -ENOBUFS; + u32 dst, src, iif; + int err; struct sk_buff *skb; + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv4_policy); + if (err < 0) + goto errout; + + rtm = nlmsg_data(nlh); + skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); - if (!skb) - goto out; + if (skb == NULL) { + err = -ENOBUFS; + goto errout; + } /* Reserve room for dummy headers, this skb can pass through good chunk of routing engine. @@ -2759,61 +2765,61 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) skb->nh.iph->protocol = IPPROTO_ICMP; skb_reserve(skb, MAX_HEADER + sizeof(struct iphdr)); - if (rta[RTA_SRC - 1]) - memcpy(&src, RTA_DATA(rta[RTA_SRC - 1]), 4); - if (rta[RTA_DST - 1]) - memcpy(&dst, RTA_DATA(rta[RTA_DST - 1]), 4); - if (rta[RTA_IIF - 1]) - memcpy(&iif, RTA_DATA(rta[RTA_IIF - 1]), sizeof(int)); + src = tb[RTA_SRC] ? nla_get_u32(tb[RTA_SRC]) : 0; + dst = tb[RTA_DST] ? nla_get_u32(tb[RTA_DST]) : 0; + iif = tb[RTA_IIF] ? nla_get_u32(tb[RTA_IIF]) : 0; if (iif) { - struct net_device *dev = __dev_get_by_index(iif); - err = -ENODEV; - if (!dev) - goto out_free; + struct net_device *dev; + + dev = __dev_get_by_index(iif); + if (dev == NULL) { + err = -ENODEV; + goto errout_free; + } + skb->protocol = htons(ETH_P_IP); skb->dev = dev; local_bh_disable(); err = ip_route_input(skb, dst, src, rtm->rtm_tos, dev); local_bh_enable(); - rt = (struct rtable*)skb->dst; - if (!err && rt->u.dst.error) + + rt = (struct rtable*) skb->dst; + if (err == 0 && rt->u.dst.error) err = -rt->u.dst.error; } else { - struct flowi fl = { .nl_u = { .ip4_u = { .daddr = dst, - .saddr = src, - .tos = rtm->rtm_tos } } }; - int oif = 0; - if (rta[RTA_OIF - 1]) - memcpy(&oif, RTA_DATA(rta[RTA_OIF - 1]), sizeof(int)); - fl.oif = oif; + struct flowi fl = { + .nl_u = { + .ip4_u = { + .daddr = dst, + .saddr = src, + .tos = rtm->rtm_tos, + }, + }, + .oif = tb[RTA_OIF] ? nla_get_u32(tb[RTA_OIF]) : 0, + }; err = ip_route_output_key(&rt, &fl); } + if (err) - goto out_free; + goto errout_free; skb->dst = &rt->u.dst; if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - NETLINK_CB(skb).dst_pid = NETLINK_CB(in_skb).pid; - err = rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); - if (!err) - goto out_free; - if (err < 0) { - err = -EMSGSIZE; - goto out_free; - } + if (err <= 0) + goto errout_free; err = rtnl_unicast(skb, NETLINK_CB(in_skb).pid); -out: +errout: return err; -out_free: +errout_free: kfree_skb(skb); - goto out; + goto errout; } int ip_rt_dump(struct sk_buff *skb, struct netlink_callback *cb) -- cgit v1.3-8-gc7d7 From e9ce1cd3cf6cf35b21d0ce990f2e738f35907386 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 21 Aug 2006 23:54:55 -0700 Subject: [PKT_SCHED]: Kill pkt_act.h inlining. This was simply making templates of functions and mostly causing a lot of code duplication in the classifier action modules. We solve this more cleanly by having a common "struct tcf_common" that hash worker functions contained once in act_api.c can work with. Callers work with real action objects that have the common struct plus their module specific struct members. You go from a common object to the higher level one using a "to_foo()" macro which makes use of container_of() to do the dirty work. This also kills off act_generic.h which was only used by act_simple.c and keeping it around was more work than the it's value. Signed-off-by: David S. Miller --- include/net/act_api.h | 136 +++++++---- include/net/act_generic.h | 142 ------------ include/net/pkt_act.h | 273 ---------------------- include/net/tc_act/tc_defact.h | 13 +- include/net/tc_act/tc_gact.h | 18 +- include/net/tc_act/tc_ipt.h | 15 +- include/net/tc_act/tc_mirred.h | 17 +- include/net/tc_act/tc_pedit.h | 15 +- net/sched/act_api.c | 246 ++++++++++++++++++-- net/sched/act_gact.c | 142 ++++++------ net/sched/act_ipt.c | 175 +++++++------- net/sched/act_mirred.c | 159 ++++++------- net/sched/act_pedit.c | 166 ++++++-------- net/sched/act_police.c | 508 +++++++++++++++++++++-------------------- net/sched/act_simple.c | 183 ++++++++++++--- 15 files changed, 1061 insertions(+), 1147 deletions(-) delete mode 100644 include/net/act_generic.h delete mode 100644 include/net/pkt_act.h (limited to 'include/net') diff --git a/include/net/act_api.h b/include/net/act_api.h index 11e9eaf79f5a..8b06c2f3657f 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -8,70 +8,110 @@ #include #include -#define tca_gen(name) \ -struct tcf_##name *next; \ - u32 index; \ - int refcnt; \ - int bindcnt; \ - u32 capab; \ - int action; \ - struct tcf_t tm; \ - struct gnet_stats_basic bstats; \ - struct gnet_stats_queue qstats; \ - struct gnet_stats_rate_est rate_est; \ - spinlock_t *stats_lock; \ - spinlock_t lock - -struct tcf_police -{ - tca_gen(police); - int result; - u32 ewma_rate; - u32 burst; - u32 mtu; - u32 toks; - u32 ptoks; - psched_time_t t_c; - struct qdisc_rate_table *R_tab; - struct qdisc_rate_table *P_tab; +struct tcf_common { + struct tcf_common *tcfc_next; + u32 tcfc_index; + int tcfc_refcnt; + int tcfc_bindcnt; + u32 tcfc_capab; + int tcfc_action; + struct tcf_t tcfc_tm; + struct gnet_stats_basic tcfc_bstats; + struct gnet_stats_queue tcfc_qstats; + struct gnet_stats_rate_est tcfc_rate_est; + spinlock_t *tcfc_stats_lock; + spinlock_t tcfc_lock; +}; +#define tcf_next common.tcfc_next +#define tcf_index common.tcfc_index +#define tcf_refcnt common.tcfc_refcnt +#define tcf_bindcnt common.tcfc_bindcnt +#define tcf_capab common.tcfc_capab +#define tcf_action common.tcfc_action +#define tcf_tm common.tcfc_tm +#define tcf_bstats common.tcfc_bstats +#define tcf_qstats common.tcfc_qstats +#define tcf_rate_est common.tcfc_rate_est +#define tcf_stats_lock common.tcfc_stats_lock +#define tcf_lock common.tcfc_lock + +struct tcf_police { + struct tcf_common common; + int tcfp_result; + u32 tcfp_ewma_rate; + u32 tcfp_burst; + u32 tcfp_mtu; + u32 tcfp_toks; + u32 tcfp_ptoks; + psched_time_t tcfp_t_c; + struct qdisc_rate_table *tcfp_R_tab; + struct qdisc_rate_table *tcfp_P_tab; }; +#define to_police(pc) \ + container_of(pc, struct tcf_police, common) + +struct tcf_hashinfo { + struct tcf_common **htab; + unsigned int hmask; + rwlock_t *lock; +}; + +static inline unsigned int tcf_hash(u32 index, unsigned int hmask) +{ + return index & hmask; +} #ifdef CONFIG_NET_CLS_ACT #define ACT_P_CREATED 1 #define ACT_P_DELETED 1 -struct tcf_act_hdr -{ - tca_gen(act_hdr); +struct tcf_act_hdr { + struct tcf_common common; }; -struct tc_action -{ - void *priv; - struct tc_action_ops *ops; - __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ - __u32 order; - struct tc_action *next; +struct tc_action { + void *priv; + struct tc_action_ops *ops; + __u32 type; /* for backward compat(TCA_OLD_COMPAT) */ + __u32 order; + struct tc_action *next; }; #define TCA_CAP_NONE 0 -struct tc_action_ops -{ +struct tc_action_ops { struct tc_action_ops *next; + struct tcf_hashinfo *hinfo; char kind[IFNAMSIZ]; __u32 type; /* TBD to match kind */ __u32 capab; /* capabilities includes 4 bit version */ struct module *owner; int (*act)(struct sk_buff *, struct tc_action *, struct tcf_result *); int (*get_stats)(struct sk_buff *, struct tc_action *); - int (*dump)(struct sk_buff *, struct tc_action *,int , int); + int (*dump)(struct sk_buff *, struct tc_action *, int, int); int (*cleanup)(struct tc_action *, int bind); - int (*lookup)(struct tc_action *, u32 ); - int (*init)(struct rtattr *,struct rtattr *,struct tc_action *, int , int ); - int (*walk)(struct sk_buff *, struct netlink_callback *, int , struct tc_action *); + int (*lookup)(struct tc_action *, u32); + int (*init)(struct rtattr *, struct rtattr *, struct tc_action *, int , int); + int (*walk)(struct sk_buff *, struct netlink_callback *, int, struct tc_action *); }; +extern struct tcf_common *tcf_hash_lookup(u32 index, + struct tcf_hashinfo *hinfo); +extern void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo); +extern int tcf_hash_release(struct tcf_common *p, int bind, + struct tcf_hashinfo *hinfo); +extern int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a); +extern u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo); +extern int tcf_hash_search(struct tc_action *a, u32 index); +extern struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, + int bind, struct tcf_hashinfo *hinfo); +extern struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, + struct tc_action *a, int size, + int bind, u32 *idx_gen, + struct tcf_hashinfo *hinfo); +extern void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo); + extern int tcf_register_action(struct tc_action_ops *a); extern int tcf_unregister_action(struct tc_action_ops *a); extern void tcf_action_destroy(struct tc_action *a, int bind); @@ -96,17 +136,17 @@ tcf_police_release(struct tcf_police *p, int bind) int ret = 0; #ifdef CONFIG_NET_CLS_ACT if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if (p->refcnt <= 0 && !p->bindcnt) { + if (bind) + p->tcf_bindcnt--; + + p->tcf_refcnt--; + if (p->tcf_refcnt <= 0 && !p->tcf_bindcnt) { tcf_police_destroy(p); ret = 1; } } #else - if (p && --p->refcnt == 0) + if (p && --p->tcf_refcnt == 0) tcf_police_destroy(p); #endif /* CONFIG_NET_CLS_ACT */ diff --git a/include/net/act_generic.h b/include/net/act_generic.h deleted file mode 100644 index c9daa7e52300..000000000000 --- a/include/net/act_generic.h +++ /dev/null @@ -1,142 +0,0 @@ -/* - * include/net/act_generic.h - * -*/ -#ifndef _NET_ACT_GENERIC_H -#define _NET_ACT_GENERIC_H -static inline int tcf_defact_release(struct tcf_defact *p, int bind) -{ - int ret = 0; - if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if (p->bindcnt <= 0 && p->refcnt <= 0) { - kfree(p->defdata); - tcf_hash_destroy(p); - ret = 1; - } - } - return ret; -} - -static inline int -alloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) -{ - p->defdata = kmalloc(datalen, GFP_KERNEL); - if (p->defdata == NULL) - return -ENOMEM; - p->datalen = datalen; - memcpy(p->defdata, defdata, datalen); - return 0; -} - -static inline int -realloc_defdata(struct tcf_defact *p, u32 datalen, void *defdata) -{ - /* safer to be just brute force for now */ - kfree(p->defdata); - return alloc_defdata(p, datalen, defdata); -} - -static inline int -tcf_defact_init(struct rtattr *rta, struct rtattr *est, - struct tc_action *a, int ovr, int bind) -{ - struct rtattr *tb[TCA_DEF_MAX]; - struct tc_defact *parm; - struct tcf_defact *p; - void *defdata; - u32 datalen = 0; - int ret = 0; - - if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) - return -EINVAL; - - if (tb[TCA_DEF_PARMS - 1] == NULL || - RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) - return -EINVAL; - - parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); - defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); - if (defdata == NULL) - return -EINVAL; - - datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); - if (datalen <= 0) - return -EINVAL; - - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) - return -ENOMEM; - - ret = alloc_defdata(p, datalen, defdata); - if (ret < 0) { - kfree(p); - return ret; - } - ret = ACT_P_CREATED; - } else { - if (!ovr) { - tcf_defact_release(p, bind); - return -EEXIST; - } - realloc_defdata(p, datalen, defdata); - } - - spin_lock_bh(&p->lock); - p->action = parm->action; - spin_unlock_bh(&p->lock); - if (ret == ACT_P_CREATED) - tcf_hash_insert(p); - return ret; -} - -static inline int tcf_defact_cleanup(struct tc_action *a, int bind) -{ - struct tcf_defact *p = PRIV(a, defact); - - if (p != NULL) - return tcf_defact_release(p, bind); - return 0; -} - -static inline int -tcf_defact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) -{ - unsigned char *b = skb->tail; - struct tc_defact opt; - struct tcf_defact *p = PRIV(a, defact); - struct tcf_t t; - - opt.index = p->index; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.action = p->action; - RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); - RTA_PUT(skb, TCA_DEF_DATA, p->datalen, p->defdata); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); - RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); - return skb->len; - -rtattr_failure: - skb_trim(skb, b - skb->data); - return -1; -} - -#define tca_use_default_ops \ - .dump = tcf_defact_dump, \ - .cleanup = tcf_defact_cleanup, \ - .init = tcf_defact_init, \ - .walk = tcf_generic_walker, \ - -#define tca_use_default_defines(name) \ - static u32 idx_gen; \ - static struct tcf_defact *tcf_##name_ht[MY_TAB_SIZE]; \ - static DEFINE_RWLOCK(##name_lock); -#endif /* _NET_ACT_GENERIC_H */ diff --git a/include/net/pkt_act.h b/include/net/pkt_act.h deleted file mode 100644 index cf5e4d2e4c21..000000000000 --- a/include/net/pkt_act.h +++ /dev/null @@ -1,273 +0,0 @@ -#ifndef __NET_PKT_ACT_H -#define __NET_PKT_ACT_H - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define tca_st(val) (struct tcf_##val *) -#define PRIV(a,name) ( tca_st(name) (a)->priv) - -#if 0 /* control */ -#define DPRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define DPRINTK(format,args...) -#endif - -#if 0 /* data */ -#define D2PRINTK(format,args...) printk(KERN_DEBUG format,##args) -#else -#define D2PRINTK(format,args...) -#endif - -static __inline__ unsigned -tcf_hash(u32 index) -{ - return index & MY_TAB_MASK; -} - -/* probably move this from being inline - * and put into act_generic -*/ -static inline void -tcf_hash_destroy(struct tcf_st *p) -{ - unsigned h = tcf_hash(p->index); - struct tcf_st **p1p; - - for (p1p = &tcf_ht[h]; *p1p; p1p = &(*p1p)->next) { - if (*p1p == p) { - write_lock_bh(&tcf_t_lock); - *p1p = p->next; - write_unlock_bh(&tcf_t_lock); -#ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->bstats, &p->rate_est); -#endif - kfree(p); - return; - } - } - BUG_TRAP(0); -} - -static inline int -tcf_hash_release(struct tcf_st *p, int bind ) -{ - int ret = 0; - if (p) { - if (bind) { - p->bindcnt--; - } - p->refcnt--; - if(p->bindcnt <=0 && p->refcnt <= 0) { - tcf_hash_destroy(p); - ret = 1; - } - } - return ret; -} - -static __inline__ int -tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, - struct tc_action *a) -{ - struct tcf_st *p; - int err =0, index = -1,i= 0, s_i = 0, n_i = 0; - struct rtattr *r ; - - read_lock(&tcf_t_lock); - - s_i = cb->args[0]; - - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_ht[tcf_hash(i)]; - - for (; p; p = p->next) { - index++; - if (index < s_i) - continue; - a->priv = p; - a->order = n_i; - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - err = tcf_action_dump_1(skb, a, 0, 0); - if (0 > err) { - index--; - skb_trim(skb, (u8*)r - skb->data); - goto done; - } - r->rta_len = skb->tail - (u8*)r; - n_i++; - if (n_i >= TCA_ACT_MAX_PRIO) { - goto done; - } - } - } -done: - read_unlock(&tcf_t_lock); - if (n_i) - cb->args[0] += n_i; - return n_i; - -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - goto done; -} - -static __inline__ int -tcf_del_walker(struct sk_buff *skb, struct tc_action *a) -{ - struct tcf_st *p, *s_p; - struct rtattr *r ; - int i= 0, n_i = 0; - - r = (struct rtattr*) skb->tail; - RTA_PUT(skb, a->order, 0, NULL); - RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_ht[tcf_hash(i)]; - - while (p != NULL) { - s_p = p->next; - if (ACT_P_DELETED == tcf_hash_release(p, 0)) { - module_put(a->ops->owner); - } - n_i++; - p = s_p; - } - } - RTA_PUT(skb, TCA_FCNT, 4, &n_i); - r->rta_len = skb->tail - (u8*)r; - - return n_i; -rtattr_failure: - skb_trim(skb, (u8*)r - skb->data); - return -EINVAL; -} - -static __inline__ int -tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, - struct tc_action *a) -{ - if (type == RTM_DELACTION) { - return tcf_del_walker(skb,a); - } else if (type == RTM_GETACTION) { - return tcf_dump_walker(skb,cb,a); - } else { - printk("tcf_generic_walker: unknown action %d\n",type); - return -EINVAL; - } -} - -static __inline__ struct tcf_st * -tcf_hash_lookup(u32 index) -{ - struct tcf_st *p; - - read_lock(&tcf_t_lock); - for (p = tcf_ht[tcf_hash(index)]; p; p = p->next) { - if (p->index == index) - break; - } - read_unlock(&tcf_t_lock); - return p; -} - -static __inline__ u32 -tcf_hash_new_index(void) -{ - do { - if (++idx_gen == 0) - idx_gen = 1; - } while (tcf_hash_lookup(idx_gen)); - - return idx_gen; -} - - -static inline int -tcf_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_st *p = tcf_hash_lookup(index); - - if (p != NULL) { - a->priv = p; - return 1; - } - return 0; -} - -#ifdef CONFIG_NET_ACT_INIT -static inline struct tcf_st * -tcf_hash_check(u32 index, struct tc_action *a, int ovr, int bind) -{ - struct tcf_st *p = NULL; - if (index && (p = tcf_hash_lookup(index)) != NULL) { - if (bind) { - p->bindcnt++; - p->refcnt++; - } - a->priv = p; - } - return p; -} - -static inline struct tcf_st * -tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int ovr, int bind) -{ - struct tcf_st *p = NULL; - - p = kmalloc(size, GFP_KERNEL); - if (p == NULL) - return p; - - memset(p, 0, size); - p->refcnt = 1; - - if (bind) { - p->bindcnt = 1; - } - - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; - p->index = index ? : tcf_hash_new_index(); - p->tm.install = jiffies; - p->tm.lastuse = jiffies; -#ifdef CONFIG_NET_ESTIMATOR - if (est) - gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); -#endif - a->priv = (void *) p; - return p; -} - -static inline void tcf_hash_insert(struct tcf_st *p) -{ - unsigned h = tcf_hash(p->index); - - write_lock_bh(&tcf_t_lock); - p->next = tcf_ht[h]; - tcf_ht[h] = p; - write_unlock_bh(&tcf_t_lock); -} - -#endif - -#endif diff --git a/include/net/tc_act/tc_defact.h b/include/net/tc_act/tc_defact.h index 463aa671f95d..65f024b80958 100644 --- a/include/net/tc_act/tc_defact.h +++ b/include/net/tc_act/tc_defact.h @@ -3,11 +3,12 @@ #include -struct tcf_defact -{ - tca_gen(defact); - u32 datalen; - void *defdata; +struct tcf_defact { + struct tcf_common common; + u32 tcfd_datalen; + void *tcfd_defdata; }; +#define to_defact(pc) \ + container_of(pc, struct tcf_defact, common) -#endif +#endif /* __NET_TC_DEF_H */ diff --git a/include/net/tc_act/tc_gact.h b/include/net/tc_act/tc_gact.h index 59f0d9628ad1..9e3f6767b80e 100644 --- a/include/net/tc_act/tc_gact.h +++ b/include/net/tc_act/tc_gact.h @@ -3,15 +3,15 @@ #include -struct tcf_gact -{ - tca_gen(gact); +struct tcf_gact { + struct tcf_common common; #ifdef CONFIG_GACT_PROB - u16 ptype; - u16 pval; - int paction; + u16 tcfg_ptype; + u16 tcfg_pval; + int tcfg_paction; #endif - }; - -#endif +#define to_gact(pc) \ + container_of(pc, struct tcf_gact, common) + +#endif /* __NET_TC_GACT_H */ diff --git a/include/net/tc_act/tc_ipt.h b/include/net/tc_act/tc_ipt.h index cb37ad08427f..f7d25dfcc4b7 100644 --- a/include/net/tc_act/tc_ipt.h +++ b/include/net/tc_act/tc_ipt.h @@ -5,12 +5,13 @@ struct xt_entry_target; -struct tcf_ipt -{ - tca_gen(ipt); - u32 hook; - char *tname; - struct xt_entry_target *t; +struct tcf_ipt { + struct tcf_common common; + u32 tcfi_hook; + char *tcfi_tname; + struct xt_entry_target *tcfi_t; }; +#define to_ipt(pc) \ + container_of(pc, struct tcf_ipt, common) -#endif +#endif /* __NET_TC_IPT_H */ diff --git a/include/net/tc_act/tc_mirred.h b/include/net/tc_act/tc_mirred.h index b5c32f65c12c..ceac661cdfd5 100644 --- a/include/net/tc_act/tc_mirred.h +++ b/include/net/tc_act/tc_mirred.h @@ -3,13 +3,14 @@ #include -struct tcf_mirred -{ - tca_gen(mirred); - int eaction; - int ifindex; - int ok_push; - struct net_device *dev; +struct tcf_mirred { + struct tcf_common common; + int tcfm_eaction; + int tcfm_ifindex; + int tcfm_ok_push; + struct net_device *tcfm_dev; }; +#define to_mirred(pc) \ + container_of(pc, struct tcf_mirred, common) -#endif +#endif /* __NET_TC_MIR_H */ diff --git a/include/net/tc_act/tc_pedit.h b/include/net/tc_act/tc_pedit.h index eb21689d759d..e6f6e15956f5 100644 --- a/include/net/tc_act/tc_pedit.h +++ b/include/net/tc_act/tc_pedit.h @@ -3,12 +3,13 @@ #include -struct tcf_pedit -{ - tca_gen(pedit); - unsigned char nkeys; - unsigned char flags; - struct tc_pedit_key *keys; +struct tcf_pedit { + struct tcf_common common; + unsigned char tcfp_nkeys; + unsigned char tcfp_flags; + struct tc_pedit_key *tcfp_keys; }; +#define to_pedit(pc) \ + container_of(pc, struct tcf_pedit, common) -#endif +#endif /* __NET_TC_PED_H */ diff --git a/net/sched/act_api.c b/net/sched/act_api.c index 6990747d6d5a..835070e9169c 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -33,16 +33,230 @@ #include #include -#if 0 /* control */ -#define DPRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define DPRINTK(format, args...) +void tcf_hash_destroy(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + struct tcf_common **p1p; + + for (p1p = &hinfo->htab[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == p) { + write_lock_bh(hinfo->lock); + *p1p = p->tcfc_next; + write_unlock_bh(hinfo->lock); +#ifdef CONFIG_NET_ESTIMATOR + gen_kill_estimator(&p->tcfc_bstats, + &p->tcfc_rate_est); #endif -#if 0 /* data */ -#define D2PRINTK(format, args...) printk(KERN_DEBUG format, ##args) -#else -#define D2PRINTK(format, args...) + kfree(p); + return; + } + } + BUG_TRAP(0); +} +EXPORT_SYMBOL(tcf_hash_destroy); + +int tcf_hash_release(struct tcf_common *p, int bind, + struct tcf_hashinfo *hinfo) +{ + int ret = 0; + + if (p) { + if (bind) + p->tcfc_bindcnt--; + + p->tcfc_refcnt--; + if (p->tcfc_bindcnt <= 0 && p->tcfc_refcnt <= 0) { + tcf_hash_destroy(p, hinfo); + ret = 1; + } + } + return ret; +} +EXPORT_SYMBOL(tcf_hash_release); + +static int tcf_dump_walker(struct sk_buff *skb, struct netlink_callback *cb, + struct tc_action *a, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + int err = 0, index = -1,i = 0, s_i = 0, n_i = 0; + struct rtattr *r ; + + read_lock(hinfo->lock); + + s_i = cb->args[0]; + + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + for (; p; p = p->tcfc_next) { + index++; + if (index < s_i) + continue; + a->priv = p; + a->order = n_i; + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + err = tcf_action_dump_1(skb, a, 0, 0); + if (err < 0) { + index--; + skb_trim(skb, (u8*)r - skb->data); + goto done; + } + r->rta_len = skb->tail - (u8*)r; + n_i++; + if (n_i >= TCA_ACT_MAX_PRIO) + goto done; + } + } +done: + read_unlock(hinfo->lock); + if (n_i) + cb->args[0] += n_i; + return n_i; + +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + goto done; +} + +static int tcf_del_walker(struct sk_buff *skb, struct tc_action *a, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p, *s_p; + struct rtattr *r ; + int i= 0, n_i = 0; + + r = (struct rtattr*) skb->tail; + RTA_PUT(skb, a->order, 0, NULL); + RTA_PUT(skb, TCA_KIND, IFNAMSIZ, a->ops->kind); + for (i = 0; i < (hinfo->hmask + 1); i++) { + p = hinfo->htab[tcf_hash(i, hinfo->hmask)]; + + while (p != NULL) { + s_p = p->tcfc_next; + if (ACT_P_DELETED == tcf_hash_release(p, 0, hinfo)) + module_put(a->ops->owner); + n_i++; + p = s_p; + } + } + RTA_PUT(skb, TCA_FCNT, 4, &n_i); + r->rta_len = skb->tail - (u8*)r; + + return n_i; +rtattr_failure: + skb_trim(skb, (u8*)r - skb->data); + return -EINVAL; +} + +int tcf_generic_walker(struct sk_buff *skb, struct netlink_callback *cb, + int type, struct tc_action *a) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + + if (type == RTM_DELACTION) { + return tcf_del_walker(skb, a, hinfo); + } else if (type == RTM_GETACTION) { + return tcf_dump_walker(skb, cb, a, hinfo); + } else { + printk("tcf_generic_walker: unknown action %d\n", type); + return -EINVAL; + } +} +EXPORT_SYMBOL(tcf_generic_walker); + +struct tcf_common *tcf_hash_lookup(u32 index, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} +EXPORT_SYMBOL(tcf_hash_lookup); + +u32 tcf_hash_new_index(u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_hash_lookup(val, hinfo)); + + return (*idx_gen = val); +} +EXPORT_SYMBOL(tcf_hash_new_index); + +int tcf_hash_search(struct tc_action *a, u32 index) +{ + struct tcf_hashinfo *hinfo = a->ops->hinfo; + struct tcf_common *p = tcf_hash_lookup(index, hinfo); + + if (p) { + a->priv = p; + return 1; + } + return 0; +} +EXPORT_SYMBOL(tcf_hash_search); + +struct tcf_common *tcf_hash_check(u32 index, struct tc_action *a, int bind, + struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = NULL; + if (index && (p = tcf_hash_lookup(index, hinfo)) != NULL) { + if (bind) { + p->tcfc_bindcnt++; + p->tcfc_refcnt++; + } + a->priv = p; + } + return p; +} +EXPORT_SYMBOL(tcf_hash_check); + +struct tcf_common *tcf_hash_create(u32 index, struct rtattr *est, struct tc_action *a, int size, int bind, u32 *idx_gen, struct tcf_hashinfo *hinfo) +{ + struct tcf_common *p = kzalloc(size, GFP_KERNEL); + + if (unlikely(!p)) + return p; + p->tcfc_refcnt = 1; + if (bind) + p->tcfc_bindcnt = 1; + + spin_lock_init(&p->tcfc_lock); + p->tcfc_stats_lock = &p->tcfc_lock; + p->tcfc_index = index ? index : tcf_hash_new_index(idx_gen, hinfo); + p->tcfc_tm.install = jiffies; + p->tcfc_tm.lastuse = jiffies; +#ifdef CONFIG_NET_ESTIMATOR + if (est) + gen_new_estimator(&p->tcfc_bstats, &p->tcfc_rate_est, + p->tcfc_stats_lock, est); #endif + a->priv = (void *) p; + return p; +} +EXPORT_SYMBOL(tcf_hash_create); + +void tcf_hash_insert(struct tcf_common *p, struct tcf_hashinfo *hinfo) +{ + unsigned int h = tcf_hash(p->tcfc_index, hinfo->hmask); + + write_lock_bh(hinfo->lock); + p->tcfc_next = hinfo->htab[h]; + hinfo->htab[h] = p; + write_unlock_bh(hinfo->lock); +} +EXPORT_SYMBOL(tcf_hash_insert); static struct tc_action_ops *act_base = NULL; static DEFINE_RWLOCK(act_mod_lock); @@ -155,9 +369,6 @@ int tcf_action_exec(struct sk_buff *skb, struct tc_action *act, if (skb->tc_verd & TC_NCLS) { skb->tc_verd = CLR_TC_NCLS(skb->tc_verd); - D2PRINTK("(%p)tcf_action_exec: cleared TC_NCLS in %s out %s\n", - skb, skb->input_dev ? skb->input_dev->name : "xxx", - skb->dev->name); ret = TC_ACT_OK; goto exec_done; } @@ -187,8 +398,6 @@ void tcf_action_destroy(struct tc_action *act, int bind) for (a = act; a; a = act) { if (a->ops && a->ops->cleanup) { - DPRINTK("tcf_action_destroy destroying %p next %p\n", - a, a->next); if (a->ops->cleanup(a, bind) == ACT_P_DELETED) module_put(a->ops->owner); act = act->next; @@ -331,7 +540,6 @@ struct tc_action *tcf_action_init_1(struct rtattr *rta, struct rtattr *est, if (*err != ACT_P_CREATED) module_put(a_o->owner); a->ops = a_o; - DPRINTK("tcf_action_init_1: successfull %s\n", act_name); *err = 0; return a; @@ -392,12 +600,12 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (compat_mode) { if (a->type == TCA_OLD_COMPAT) err = gnet_stats_start_copy_compat(skb, 0, - TCA_STATS, TCA_XSTATS, h->stats_lock, &d); + TCA_STATS, TCA_XSTATS, h->tcf_stats_lock, &d); else return 0; } else err = gnet_stats_start_copy(skb, TCA_ACT_STATS, - h->stats_lock, &d); + h->tcf_stats_lock, &d); if (err < 0) goto errout; @@ -406,11 +614,11 @@ int tcf_action_copy_stats(struct sk_buff *skb, struct tc_action *a, if (a->ops->get_stats(skb, a) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &h->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &h->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &h->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &h->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &h->qstats) < 0) + gnet_stats_copy_queue(&d, &h->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_gact.c b/net/sched/act_gact.c index e75a147ad60f..6cff56696a81 100644 --- a/net/sched/act_gact.c +++ b/net/sched/act_gact.c @@ -34,48 +34,43 @@ #include #include -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 - -static u32 idx_gen; -static struct tcf_gact *tcf_gact_ht[MY_TAB_SIZE]; +#define GACT_TAB_MASK 15 +static struct tcf_common *tcf_gact_ht[GACT_TAB_MASK + 1]; +static u32 gact_idx_gen; static DEFINE_RWLOCK(gact_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_gact -#define tc_st tc_gact -#define tcf_t_lock gact_lock -#define tcf_ht tcf_gact_ht - -#define CONFIG_NET_ACT_INIT 1 -#include +static struct tcf_hashinfo gact_hash_info = { + .htab = tcf_gact_ht, + .hmask = GACT_TAB_MASK, + .lock = &gact_lock, +}; #ifdef CONFIG_GACT_PROB -static int gact_net_rand(struct tcf_gact *p) +static int gact_net_rand(struct tcf_gact *gact) { - if (net_random()%p->pval) - return p->action; - return p->paction; + if (net_random() % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -static int gact_determ(struct tcf_gact *p) +static int gact_determ(struct tcf_gact *gact) { - if (p->bstats.packets%p->pval) - return p->action; - return p->paction; + if (gact->tcf_bstats.packets % gact->tcfg_pval) + return gact->tcf_action; + return gact->tcfg_paction; } -typedef int (*g_rand)(struct tcf_gact *p); +typedef int (*g_rand)(struct tcf_gact *gact); static g_rand gact_rand[MAX_RAND]= { NULL, gact_net_rand, gact_determ }; -#endif +#endif /* CONFIG_GACT_PROB */ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_GACT_MAX]; struct tc_gact *parm; - struct tcf_gact *p; + struct tcf_gact *gact; + struct tcf_common *pc; int ret = 0; if (rta == NULL || rtattr_parse_nested(tb, TCA_GACT_MAX, rta) < 0) @@ -94,105 +89,106 @@ static int tcf_gact_init(struct rtattr *rta, struct rtattr *est, return -EOPNOTSUPP; #endif - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(parm->index, a, bind, &gact_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*gact), + bind, &gact_idx_gen, &gact_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &gact_hash_info); return -EEXIST; } } - spin_lock_bh(&p->lock); - p->action = parm->action; + gact = to_gact(pc); + + spin_lock_bh(&gact->tcf_lock); + gact->tcf_action = parm->action; #ifdef CONFIG_GACT_PROB if (tb[TCA_GACT_PROB-1] != NULL) { struct tc_gact_p *p_parm = RTA_DATA(tb[TCA_GACT_PROB-1]); - p->paction = p_parm->paction; - p->pval = p_parm->pval; - p->ptype = p_parm->ptype; + gact->tcfg_paction = p_parm->paction; + gact->tcfg_pval = p_parm->pval; + gact->tcfg_ptype = p_parm->ptype; } #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&gact->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &gact_hash_info); return ret; } -static int -tcf_gact_cleanup(struct tc_action *a, int bind) +static int tcf_gact_cleanup(struct tc_action *a, int bind) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; - if (p != NULL) - return tcf_hash_release(p, bind); + if (gact) + return tcf_hash_release(&gact->common, bind, &gact_hash_info); return 0; } -static int -tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_gact(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; int action = TC_ACT_SHOT; - spin_lock(&p->lock); + spin_lock(&gact->tcf_lock); #ifdef CONFIG_GACT_PROB - if (p->ptype && gact_rand[p->ptype] != NULL) - action = gact_rand[p->ptype](p); + if (gact->tcfg_ptype && gact_rand[gact->tcfg_ptype] != NULL) + action = gact_rand[gact->tcfg_ptype](gact); else - action = p->action; + action = gact->tcf_action; #else - action = p->action; + action = gact->tcf_action; #endif - p->bstats.bytes += skb->len; - p->bstats.packets++; + gact->tcf_bstats.bytes += skb->len; + gact->tcf_bstats.packets++; if (action == TC_ACT_SHOT) - p->qstats.drops++; - p->tm.lastuse = jiffies; - spin_unlock(&p->lock); + gact->tcf_qstats.drops++; + gact->tcf_tm.lastuse = jiffies; + spin_unlock(&gact->tcf_lock); return action; } -static int -tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_gact_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; struct tc_gact opt; - struct tcf_gact *p = PRIV(a, gact); + struct tcf_gact *gact = a->priv; struct tcf_t t; - opt.index = p->index; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.action = p->action; + opt.index = gact->tcf_index; + opt.refcnt = gact->tcf_refcnt - ref; + opt.bindcnt = gact->tcf_bindcnt - bind; + opt.action = gact->tcf_action; RTA_PUT(skb, TCA_GACT_PARMS, sizeof(opt), &opt); #ifdef CONFIG_GACT_PROB - if (p->ptype) { + if (gact->tcfg_ptype) { struct tc_gact_p p_opt; - p_opt.paction = p->paction; - p_opt.pval = p->pval; - p_opt.ptype = p->ptype; + p_opt.paction = gact->tcfg_paction; + p_opt.pval = gact->tcfg_pval; + p_opt.ptype = gact->tcfg_ptype; RTA_PUT(skb, TCA_GACT_PROB, sizeof(p_opt), &p_opt); } #endif - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - gact->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - gact->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(gact->tcf_tm.expires); RTA_PUT(skb, TCA_GACT_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_gact_ops = { .kind = "gact", + .hinfo = &gact_hash_info, .type = TCA_ACT_GACT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -208,8 +204,7 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Classifier actions"); MODULE_LICENSE("GPL"); -static int __init -gact_init_module(void) +static int __init gact_init_module(void) { #ifdef CONFIG_GACT_PROB printk("GACT probability on\n"); @@ -219,8 +214,7 @@ gact_init_module(void) return tcf_register_action(&act_gact_ops); } -static void __exit -gact_cleanup_module(void) +static void __exit gact_cleanup_module(void) { tcf_unregister_action(&act_gact_ops); } diff --git a/net/sched/act_ipt.c b/net/sched/act_ipt.c index d799e01248c4..224c078a398e 100644 --- a/net/sched/act_ipt.c +++ b/net/sched/act_ipt.c @@ -38,25 +38,19 @@ #include -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_ipt *tcf_ipt_ht[MY_TAB_SIZE]; -/* ipt hash table lock */ +#define IPT_TAB_MASK 15 +static struct tcf_common *tcf_ipt_ht[IPT_TAB_MASK + 1]; +static u32 ipt_idx_gen; static DEFINE_RWLOCK(ipt_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_ipt -#define tcf_t_lock ipt_lock -#define tcf_ht tcf_ipt_ht - -#define CONFIG_NET_ACT_INIT -#include +static struct tcf_hashinfo ipt_hash_info = { + .htab = tcf_ipt_ht, + .hmask = IPT_TAB_MASK, + .lock = &ipt_lock, +}; -static int -ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) +static int ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) { struct ipt_target *target; int ret = 0; @@ -65,7 +59,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) if (!target) return -ENOENT; - DPRINTK("ipt_init_target: found %s\n", target->name); t->u.kernel.target = target; ret = xt_check_target(target, AF_INET, t->u.target_size - sizeof(*t), @@ -78,8 +71,6 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) t->u.kernel.target, t->data, t->u.target_size - sizeof(*t), hook)) { - DPRINTK("ipt_init_target: check failed for `%s'.\n", - t->u.kernel.target->name); module_put(t->u.kernel.target->me); ret = -EINVAL; } @@ -87,8 +78,7 @@ ipt_init_target(struct ipt_entry_target *t, char *table, unsigned int hook) return ret; } -static void -ipt_destroy_target(struct ipt_entry_target *t) +static void ipt_destroy_target(struct ipt_entry_target *t) { if (t->u.kernel.target->destroy) t->u.kernel.target->destroy(t->u.kernel.target, t->data, @@ -96,31 +86,30 @@ ipt_destroy_target(struct ipt_entry_target *t) module_put(t->u.kernel.target->me); } -static int -tcf_ipt_release(struct tcf_ipt *p, int bind) +static int tcf_ipt_release(struct tcf_ipt *ipt, int bind) { int ret = 0; - if (p) { + if (ipt) { if (bind) - p->bindcnt--; - p->refcnt--; - if (p->bindcnt <= 0 && p->refcnt <= 0) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); - tcf_hash_destroy(p); + ipt->tcf_bindcnt--; + ipt->tcf_refcnt--; + if (ipt->tcf_bindcnt <= 0 && ipt->tcf_refcnt <= 0) { + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); + tcf_hash_destroy(&ipt->common, &ipt_hash_info); ret = ACT_P_DELETED; } } return ret; } -static int -tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_ipt_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_IPT_MAX]; - struct tcf_ipt *p; + struct tcf_ipt *ipt; + struct tcf_common *pc; struct ipt_entry_target *td, *t; char *tname; int ret = 0, err; @@ -144,49 +133,51 @@ tcf_ipt_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, RTA_PAYLOAD(tb[TCA_IPT_INDEX-1]) >= sizeof(u32)) index = *(u32 *)RTA_DATA(tb[TCA_IPT_INDEX-1]); - p = tcf_hash_check(index, a, ovr, bind); - if (p == NULL) { - p = tcf_hash_create(index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_check(index, a, bind, &ipt_hash_info); + if (!pc) { + pc = tcf_hash_create(index, est, a, sizeof(*ipt), bind, + &ipt_idx_gen, &ipt_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_ipt_release(p, bind); + tcf_ipt_release(to_ipt(pc), bind); return -EEXIST; } } + ipt = to_ipt(pc); hook = *(u32 *)RTA_DATA(tb[TCA_IPT_HOOK-1]); err = -ENOMEM; tname = kmalloc(IFNAMSIZ, GFP_KERNEL); - if (tname == NULL) + if (unlikely(!tname)) goto err1; if (tb[TCA_IPT_TABLE - 1] == NULL || rtattr_strlcpy(tname, tb[TCA_IPT_TABLE-1], IFNAMSIZ) >= IFNAMSIZ) strcpy(tname, "mangle"); t = kmalloc(td->u.target_size, GFP_KERNEL); - if (t == NULL) + if (unlikely(!t)) goto err2; memcpy(t, td, td->u.target_size); if ((err = ipt_init_target(t, tname, hook)) < 0) goto err3; - spin_lock_bh(&p->lock); + spin_lock_bh(&ipt->tcf_lock); if (ret != ACT_P_CREATED) { - ipt_destroy_target(p->t); - kfree(p->tname); - kfree(p->t); + ipt_destroy_target(ipt->tcfi_t); + kfree(ipt->tcfi_tname); + kfree(ipt->tcfi_t); } - p->tname = tname; - p->t = t; - p->hook = hook; - spin_unlock_bh(&p->lock); + ipt->tcfi_tname = tname; + ipt->tcfi_t = t; + ipt->tcfi_hook = hook; + spin_unlock_bh(&ipt->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &ipt_hash_info); return ret; err3: @@ -194,33 +185,32 @@ err3: err2: kfree(tname); err1: - kfree(p); + kfree(pc); return err; } -static int -tcf_ipt_cleanup(struct tc_action *a, int bind) +static int tcf_ipt_cleanup(struct tc_action *a, int bind) { - struct tcf_ipt *p = PRIV(a, ipt); - return tcf_ipt_release(p, bind); + struct tcf_ipt *ipt = a->priv; + return tcf_ipt_release(ipt, bind); } -static int -tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_ipt(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { int ret = 0, result = 0; - struct tcf_ipt *p = PRIV(a, ipt); + struct tcf_ipt *ipt = a->priv; if (skb_cloned(skb)) { if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) return TC_ACT_UNSPEC; } - spin_lock(&p->lock); + spin_lock(&ipt->tcf_lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + ipt->tcf_tm.lastuse = jiffies; + ipt->tcf_bstats.bytes += skb->len; + ipt->tcf_bstats.packets++; /* yes, we have to worry about both in and out dev worry later - danger - this API seems to have changed @@ -229,16 +219,17 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) /* iptables targets take a double skb pointer in case the skb * needs to be replaced. We don't own the skb, so this must not * happen. The pskb_expand_head above should make sure of this */ - ret = p->t->u.kernel.target->target(&skb, skb->dev, NULL, p->hook, - p->t->u.kernel.target, p->t->data, - NULL); + ret = ipt->tcfi_t->u.kernel.target->target(&skb, skb->dev, NULL, + ipt->tcfi_hook, + ipt->tcfi_t->u.kernel.target, + ipt->tcfi_t->data, NULL); switch (ret) { case NF_ACCEPT: result = TC_ACT_OK; break; case NF_DROP: result = TC_ACT_SHOT; - p->qstats.drops++; + ipt->tcf_qstats.drops++; break; case IPT_CONTINUE: result = TC_ACT_PIPE; @@ -249,53 +240,46 @@ tcf_ipt(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) result = TC_POLICE_OK; break; } - spin_unlock(&p->lock); + spin_unlock(&ipt->tcf_lock); return result; } -static int -tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { + unsigned char *b = skb->tail; + struct tcf_ipt *ipt = a->priv; struct ipt_entry_target *t; struct tcf_t tm; struct tc_cnt c; - unsigned char *b = skb->tail; - struct tcf_ipt *p = PRIV(a, ipt); /* for simple targets kernel size == user size ** user name = target name ** for foolproof you need to not assume this */ - t = kmalloc(p->t->u.user.target_size, GFP_ATOMIC); - if (t == NULL) + t = kmalloc(ipt->tcfi_t->u.user.target_size, GFP_ATOMIC); + if (unlikely(!t)) goto rtattr_failure; - c.bindcnt = p->bindcnt - bind; - c.refcnt = p->refcnt - ref; - memcpy(t, p->t, p->t->u.user.target_size); - strcpy(t->u.user.name, p->t->u.kernel.target->name); - - DPRINTK("\ttcf_ipt_dump tablename %s length %d\n", p->tname, - strlen(p->tname)); - DPRINTK("\tdump target name %s size %d size user %d " - "data[0] %x data[1] %x\n", p->t->u.kernel.target->name, - p->t->u.target_size, p->t->u.user.target_size, - p->t->data[0], p->t->data[1]); - RTA_PUT(skb, TCA_IPT_TARG, p->t->u.user.target_size, t); - RTA_PUT(skb, TCA_IPT_INDEX, 4, &p->index); - RTA_PUT(skb, TCA_IPT_HOOK, 4, &p->hook); + c.bindcnt = ipt->tcf_bindcnt - bind; + c.refcnt = ipt->tcf_refcnt - ref; + memcpy(t, ipt->tcfi_t, ipt->tcfi_t->u.user.target_size); + strcpy(t->u.user.name, ipt->tcfi_t->u.kernel.target->name); + + RTA_PUT(skb, TCA_IPT_TARG, ipt->tcfi_t->u.user.target_size, t); + RTA_PUT(skb, TCA_IPT_INDEX, 4, &ipt->tcf_index); + RTA_PUT(skb, TCA_IPT_HOOK, 4, &ipt->tcfi_hook); RTA_PUT(skb, TCA_IPT_CNT, sizeof(struct tc_cnt), &c); - RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, p->tname); - tm.install = jiffies_to_clock_t(jiffies - p->tm.install); - tm.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - tm.expires = jiffies_to_clock_t(p->tm.expires); + RTA_PUT(skb, TCA_IPT_TABLE, IFNAMSIZ, ipt->tcfi_tname); + tm.install = jiffies_to_clock_t(jiffies - ipt->tcf_tm.install); + tm.lastuse = jiffies_to_clock_t(jiffies - ipt->tcf_tm.lastuse); + tm.expires = jiffies_to_clock_t(ipt->tcf_tm.expires); RTA_PUT(skb, TCA_IPT_TM, sizeof (tm), &tm); kfree(t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); kfree(t); return -1; @@ -303,6 +287,7 @@ tcf_ipt_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) static struct tc_action_ops act_ipt_ops = { .kind = "ipt", + .hinfo = &ipt_hash_info, .type = TCA_ACT_IPT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -318,14 +303,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Iptables target actions"); MODULE_LICENSE("GPL"); -static int __init -ipt_init_module(void) +static int __init ipt_init_module(void) { return tcf_register_action(&act_ipt_ops); } -static void __exit -ipt_cleanup_module(void) +static void __exit ipt_cleanup_module(void) { tcf_unregister_action(&act_ipt_ops); } diff --git a/net/sched/act_mirred.c b/net/sched/act_mirred.c index fc562047ecc5..483897271f15 100644 --- a/net/sched/act_mirred.c +++ b/net/sched/act_mirred.c @@ -39,46 +39,39 @@ #include #include - -/* use generic hash table */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_mirred *tcf_mirred_ht[MY_TAB_SIZE]; +#define MIRRED_TAB_MASK 7 +static struct tcf_common *tcf_mirred_ht[MIRRED_TAB_MASK + 1]; +static u32 mirred_idx_gen; static DEFINE_RWLOCK(mirred_lock); -/* ovewrride the defaults */ -#define tcf_st tcf_mirred -#define tc_st tc_mirred -#define tcf_t_lock mirred_lock -#define tcf_ht tcf_mirred_ht - -#define CONFIG_NET_ACT_INIT 1 -#include +static struct tcf_hashinfo mirred_hash_info = { + .htab = tcf_mirred_ht, + .hmask = MIRRED_TAB_MASK, + .lock = &mirred_lock, +}; -static inline int -tcf_mirred_release(struct tcf_mirred *p, int bind) +static inline int tcf_mirred_release(struct tcf_mirred *m, int bind) { - if (p) { + if (m) { if (bind) - p->bindcnt--; - p->refcnt--; - if(!p->bindcnt && p->refcnt <= 0) { - dev_put(p->dev); - tcf_hash_destroy(p); + m->tcf_bindcnt--; + m->tcf_refcnt--; + if(!m->tcf_bindcnt && m->tcf_refcnt <= 0) { + dev_put(m->tcfm_dev); + tcf_hash_destroy(&m->common, &mirred_hash_info); return 1; } } return 0; } -static int -tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_mirred_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_MIRRED_MAX]; struct tc_mirred *parm; - struct tcf_mirred *p; + struct tcf_mirred *m; + struct tcf_common *pc; struct net_device *dev = NULL; int ret = 0; int ok_push = 0; @@ -110,64 +103,62 @@ tcf_mirred_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, } } - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &mirred_hash_info); + if (!pc) { if (!parm->ifindex) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*m), bind, + &mirred_idx_gen, &mirred_hash_info); + if (unlikely(!pc)) return -ENOMEM; ret = ACT_P_CREATED; } else { if (!ovr) { - tcf_mirred_release(p, bind); + tcf_mirred_release(to_mirred(pc), bind); return -EEXIST; } } + m = to_mirred(pc); - spin_lock_bh(&p->lock); - p->action = parm->action; - p->eaction = parm->eaction; + spin_lock_bh(&m->tcf_lock); + m->tcf_action = parm->action; + m->tcfm_eaction = parm->eaction; if (parm->ifindex) { - p->ifindex = parm->ifindex; + m->tcfm_ifindex = parm->ifindex; if (ret != ACT_P_CREATED) - dev_put(p->dev); - p->dev = dev; + dev_put(m->tcfm_dev); + m->tcfm_dev = dev; dev_hold(dev); - p->ok_push = ok_push; + m->tcfm_ok_push = ok_push; } - spin_unlock_bh(&p->lock); + spin_unlock_bh(&m->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &mirred_hash_info); - DPRINTK("tcf_mirred_init index %d action %d eaction %d device %s " - "ifindex %d\n", parm->index, parm->action, parm->eaction, - dev->name, parm->ifindex); return ret; } -static int -tcf_mirred_cleanup(struct tc_action *a, int bind) +static int tcf_mirred_cleanup(struct tc_action *a, int bind) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; - if (p != NULL) - return tcf_mirred_release(p, bind); + if (m) + return tcf_mirred_release(m, bind); return 0; } -static int -tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_mirred(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_mirred *p = PRIV(a, mirred); + struct tcf_mirred *m = a->priv; struct net_device *dev; struct sk_buff *skb2 = NULL; u32 at = G_TC_AT(skb->tc_verd); - spin_lock(&p->lock); + spin_lock(&m->tcf_lock); - dev = p->dev; - p->tm.lastuse = jiffies; + dev = m->tcfm_dev; + m->tcf_tm.lastuse = jiffies; if (!(dev->flags&IFF_UP) ) { if (net_ratelimit()) @@ -176,10 +167,10 @@ tcf_mirred(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) bad_mirred: if (skb2 != NULL) kfree_skb(skb2); - p->qstats.overlimits++; - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); + m->tcf_qstats.overlimits++; + m->tcf_bstats.bytes += skb->len; + m->tcf_bstats.packets++; + spin_unlock(&m->tcf_lock); /* should we be asking for packet to be dropped? * may make sense for redirect case only */ @@ -189,59 +180,59 @@ bad_mirred: skb2 = skb_clone(skb, GFP_ATOMIC); if (skb2 == NULL) goto bad_mirred; - if (p->eaction != TCA_EGRESS_MIRROR && p->eaction != TCA_EGRESS_REDIR) { + if (m->tcfm_eaction != TCA_EGRESS_MIRROR && + m->tcfm_eaction != TCA_EGRESS_REDIR) { if (net_ratelimit()) - printk("tcf_mirred unknown action %d\n", p->eaction); + printk("tcf_mirred unknown action %d\n", + m->tcfm_eaction); goto bad_mirred; } - p->bstats.bytes += skb2->len; - p->bstats.packets++; + m->tcf_bstats.bytes += skb2->len; + m->tcf_bstats.packets++; if (!(at & AT_EGRESS)) - if (p->ok_push) + if (m->tcfm_ok_push) skb_push(skb2, skb2->dev->hard_header_len); /* mirror is always swallowed */ - if (p->eaction != TCA_EGRESS_MIRROR) + if (m->tcfm_eaction != TCA_EGRESS_MIRROR) skb2->tc_verd = SET_TC_FROM(skb2->tc_verd, at); skb2->dev = dev; skb2->input_dev = skb->dev; dev_queue_xmit(skb2); - spin_unlock(&p->lock); - return p->action; + spin_unlock(&m->tcf_lock); + return m->tcf_action; } -static int -tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) +static int tcf_mirred_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_mirred *m = a->priv; struct tc_mirred opt; - struct tcf_mirred *p = PRIV(a, mirred); struct tcf_t t; - opt.index = p->index; - opt.action = p->action; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - opt.eaction = p->eaction; - opt.ifindex = p->ifindex; - DPRINTK("tcf_mirred_dump index %d action %d eaction %d ifindex %d\n", - p->index, p->action, p->eaction, p->ifindex); + opt.index = m->tcf_index; + opt.action = m->tcf_action; + opt.refcnt = m->tcf_refcnt - ref; + opt.bindcnt = m->tcf_bindcnt - bind; + opt.eaction = m->tcfm_eaction; + opt.ifindex = m->tcfm_ifindex; RTA_PUT(skb, TCA_MIRRED_PARMS, sizeof(opt), &opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - m->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - m->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(m->tcf_tm.expires); RTA_PUT(skb, TCA_MIRRED_TM, sizeof(t), &t); return skb->len; - rtattr_failure: +rtattr_failure: skb_trim(skb, b - skb->data); return -1; } static struct tc_action_ops act_mirred_ops = { .kind = "mirred", + .hinfo = &mirred_hash_info, .type = TCA_ACT_MIRRED, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -257,15 +248,13 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002)"); MODULE_DESCRIPTION("Device Mirror/redirect actions"); MODULE_LICENSE("GPL"); -static int __init -mirred_init_module(void) +static int __init mirred_init_module(void) { printk("Mirror/redirect action on\n"); return tcf_register_action(&act_mirred_ops); } -static void __exit -mirred_cleanup_module(void) +static void __exit mirred_cleanup_module(void) { tcf_unregister_action(&act_mirred_ops); } diff --git a/net/sched/act_pedit.c b/net/sched/act_pedit.c index f257475e0e0c..8ac65c219b98 100644 --- a/net/sched/act_pedit.c +++ b/net/sched/act_pedit.c @@ -33,32 +33,25 @@ #include #include - -#define PEDIT_DEB 1 - -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_pedit *tcf_pedit_ht[MY_TAB_SIZE]; +#define PEDIT_TAB_MASK 15 +static struct tcf_common *tcf_pedit_ht[PEDIT_TAB_MASK + 1]; +static u32 pedit_idx_gen; static DEFINE_RWLOCK(pedit_lock); -#define tcf_st tcf_pedit -#define tc_st tc_pedit -#define tcf_t_lock pedit_lock -#define tcf_ht tcf_pedit_ht - -#define CONFIG_NET_ACT_INIT 1 -#include +static struct tcf_hashinfo pedit_hash_info = { + .htab = tcf_pedit_ht, + .hmask = PEDIT_TAB_MASK, + .lock = &pedit_lock, +}; -static int -tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, - int ovr, int bind) +static int tcf_pedit_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) { struct rtattr *tb[TCA_PEDIT_MAX]; struct tc_pedit *parm; int ret = 0; struct tcf_pedit *p; + struct tcf_common *pc; struct tc_pedit_key *keys = NULL; int ksize; @@ -73,54 +66,56 @@ tcf_pedit_init(struct rtattr *rta, struct rtattr *est, struct tc_action *a, if (RTA_PAYLOAD(tb[TCA_PEDIT_PARMS-1]) < sizeof(*parm) + ksize) return -EINVAL; - p = tcf_hash_check(parm->index, a, ovr, bind); - if (p == NULL) { + pc = tcf_hash_check(parm->index, a, bind, &pedit_hash_info); + if (!pc) { if (!parm->nkeys) return -EINVAL; - p = tcf_hash_create(parm->index, est, a, sizeof(*p), ovr, bind); - if (p == NULL) + pc = tcf_hash_create(parm->index, est, a, sizeof(*p), bind, + &pedit_idx_gen, &pedit_hash_info); + if (unlikely(!pc)) return -ENOMEM; + p = to_pedit(pc); keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) { - kfree(p); + kfree(pc); return -ENOMEM; } ret = ACT_P_CREATED; } else { + p = to_pedit(pc); if (!ovr) { - tcf_hash_release(p, bind); + tcf_hash_release(pc, bind, &pedit_hash_info); return -EEXIST; } - if (p->nkeys && p->nkeys != parm->nkeys) { + if (p->tcfp_nkeys && p->tcfp_nkeys != parm->nkeys) { keys = kmalloc(ksize, GFP_KERNEL); if (keys == NULL) return -ENOMEM; } } - spin_lock_bh(&p->lock); - p->flags = parm->flags; - p->action = parm->action; + spin_lock_bh(&p->tcf_lock); + p->tcfp_flags = parm->flags; + p->tcf_action = parm->action; if (keys) { - kfree(p->keys); - p->keys = keys; - p->nkeys = parm->nkeys; + kfree(p->tcfp_keys); + p->tcfp_keys = keys; + p->tcfp_nkeys = parm->nkeys; } - memcpy(p->keys, parm->keys, ksize); - spin_unlock_bh(&p->lock); + memcpy(p->tcfp_keys, parm->keys, ksize); + spin_unlock_bh(&p->tcf_lock); if (ret == ACT_P_CREATED) - tcf_hash_insert(p); + tcf_hash_insert(pc, &pedit_hash_info); return ret; } -static int -tcf_pedit_cleanup(struct tc_action *a, int bind) +static int tcf_pedit_cleanup(struct tc_action *a, int bind) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; - if (p != NULL) { - struct tc_pedit_key *keys = p->keys; - if (tcf_hash_release(p, bind)) { + if (p) { + struct tc_pedit_key *keys = p->tcfp_keys; + if (tcf_hash_release(&p->common, bind, &pedit_hash_info)) { kfree(keys); return 1; } @@ -128,30 +123,30 @@ tcf_pedit_cleanup(struct tc_action *a, int bind) return 0; } -static int -tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) +static int tcf_pedit(struct sk_buff *skb, struct tc_action *a, + struct tcf_result *res) { - struct tcf_pedit *p = PRIV(a, pedit); + struct tcf_pedit *p = a->priv; int i, munged = 0; u8 *pptr; if (!(skb->tc_verd & TC_OK2MUNGE)) { /* should we set skb->cloned? */ if (pskb_expand_head(skb, 0, 0, GFP_ATOMIC)) { - return p->action; + return p->tcf_action; } } pptr = skb->nh.raw; - spin_lock(&p->lock); + spin_lock(&p->tcf_lock); - p->tm.lastuse = jiffies; + p->tcf_tm.lastuse = jiffies; - if (p->nkeys > 0) { - struct tc_pedit_key *tkey = p->keys; + if (p->tcfp_nkeys > 0) { + struct tc_pedit_key *tkey = p->tcfp_keys; - for (i = p->nkeys; i > 0; i--, tkey++) { + for (i = p->tcfp_nkeys; i > 0; i--, tkey++) { u32 *ptr; int offset = tkey->off; @@ -169,7 +164,8 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) printk("offset must be on 32 bit boundaries\n"); goto bad; } - if (skb->len < 0 || (offset > 0 && offset > skb->len)) { + if (skb->len < 0 || + (offset > 0 && offset > skb->len)) { printk("offset %d cant exceed pkt length %d\n", offset, skb->len); goto bad; @@ -185,63 +181,47 @@ tcf_pedit(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) skb->tc_verd = SET_TC_MUNGED(skb->tc_verd); goto done; } else { - printk("pedit BUG: index %d\n",p->index); + printk("pedit BUG: index %d\n", p->tcf_index); } bad: - p->qstats.overlimits++; + p->tcf_qstats.overlimits++; done: - p->bstats.bytes += skb->len; - p->bstats.packets++; - spin_unlock(&p->lock); - return p->action; + p->tcf_bstats.bytes += skb->len; + p->tcf_bstats.packets++; + spin_unlock(&p->tcf_lock); + return p->tcf_action; } -static int -tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a,int bind, int ref) +static int tcf_pedit_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_pedit *p = a->priv; struct tc_pedit *opt; - struct tcf_pedit *p = PRIV(a, pedit); struct tcf_t t; int s; - s = sizeof(*opt) + p->nkeys * sizeof(struct tc_pedit_key); + s = sizeof(*opt) + p->tcfp_nkeys * sizeof(struct tc_pedit_key); /* netlink spinlocks held above us - must use ATOMIC */ opt = kzalloc(s, GFP_ATOMIC); - if (opt == NULL) + if (unlikely(!opt)) return -ENOBUFS; - memcpy(opt->keys, p->keys, p->nkeys * sizeof(struct tc_pedit_key)); - opt->index = p->index; - opt->nkeys = p->nkeys; - opt->flags = p->flags; - opt->action = p->action; - opt->refcnt = p->refcnt - ref; - opt->bindcnt = p->bindcnt - bind; - - -#ifdef PEDIT_DEB - { - /* Debug - get rid of later */ - int i; - struct tc_pedit_key *key = opt->keys; - - for (i=0; inkeys; i++, key++) { - printk( "\n key #%d",i); - printk( " at %d: val %08x mask %08x", - (unsigned int)key->off, - (unsigned int)key->val, - (unsigned int)key->mask); - } - } -#endif + memcpy(opt->keys, p->tcfp_keys, + p->tcfp_nkeys * sizeof(struct tc_pedit_key)); + opt->index = p->tcf_index; + opt->nkeys = p->tcfp_nkeys; + opt->flags = p->tcfp_flags; + opt->action = p->tcf_action; + opt->refcnt = p->tcf_refcnt - ref; + opt->bindcnt = p->tcf_bindcnt - bind; RTA_PUT(skb, TCA_PEDIT_PARMS, s, opt); - t.install = jiffies_to_clock_t(jiffies - p->tm.install); - t.lastuse = jiffies_to_clock_t(jiffies - p->tm.lastuse); - t.expires = jiffies_to_clock_t(p->tm.expires); + t.install = jiffies_to_clock_t(jiffies - p->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - p->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(p->tcf_tm.expires); RTA_PUT(skb, TCA_PEDIT_TM, sizeof(t), &t); kfree(opt); return skb->len; @@ -252,9 +232,9 @@ rtattr_failure: return -1; } -static -struct tc_action_ops act_pedit_ops = { +static struct tc_action_ops act_pedit_ops = { .kind = "pedit", + .hinfo = &pedit_hash_info, .type = TCA_ACT_PEDIT, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, @@ -270,14 +250,12 @@ MODULE_AUTHOR("Jamal Hadi Salim(2002-4)"); MODULE_DESCRIPTION("Generic Packet Editor actions"); MODULE_LICENSE("GPL"); -static int __init -pedit_init_module(void) +static int __init pedit_init_module(void) { return tcf_register_action(&act_pedit_ops); } -static void __exit -pedit_cleanup_module(void) +static void __exit pedit_cleanup_module(void) { tcf_unregister_action(&act_pedit_ops); } diff --git a/net/sched/act_police.c b/net/sched/act_police.c index da905d7b4b40..fed47b658837 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -32,43 +32,27 @@ #include #include -#define L2T(p,L) ((p)->R_tab->data[(L)>>(p)->R_tab->rate.cell_log]) -#define L2T_P(p,L) ((p)->P_tab->data[(L)>>(p)->P_tab->rate.cell_log]) -#define PRIV(a) ((struct tcf_police *) (a)->priv) - -/* use generic hash table */ -#define MY_TAB_SIZE 16 -#define MY_TAB_MASK 15 -static u32 idx_gen; -static struct tcf_police *tcf_police_ht[MY_TAB_SIZE]; -/* Policer hash table lock */ -static DEFINE_RWLOCK(police_lock); - -/* Each policer is serialized by its individual spinlock */ +#define L2T(p,L) ((p)->tcfp_R_tab->data[(L)>>(p)->tcfp_R_tab->rate.cell_log]) +#define L2T_P(p,L) ((p)->tcfp_P_tab->data[(L)>>(p)->tcfp_P_tab->rate.cell_log]) -static __inline__ unsigned tcf_police_hash(u32 index) -{ - return index&0xF; -} +#define POL_TAB_MASK 15 +static struct tcf_common *tcf_police_ht[POL_TAB_MASK + 1]; +static u32 police_idx_gen; +static DEFINE_RWLOCK(police_lock); -static __inline__ struct tcf_police * tcf_police_lookup(u32 index) -{ - struct tcf_police *p; +static struct tcf_hashinfo police_hash_info = { + .htab = tcf_police_ht, + .hmask = POL_TAB_MASK, + .lock = &police_lock, +}; - read_lock(&police_lock); - for (p = tcf_police_ht[tcf_police_hash(index)]; p; p = p->next) { - if (p->index == index) - break; - } - read_unlock(&police_lock); - return p; -} +/* Each policer is serialized by its individual spinlock */ #ifdef CONFIG_NET_CLS_ACT static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *cb, int type, struct tc_action *a) { - struct tcf_police *p; + struct tcf_common *p; int err = 0, index = -1, i = 0, s_i = 0, n_i = 0; struct rtattr *r; @@ -76,10 +60,10 @@ static int tcf_act_police_walker(struct sk_buff *skb, struct netlink_callback *c s_i = cb->args[0]; - for (i = 0; i < MY_TAB_SIZE; i++) { - p = tcf_police_ht[tcf_police_hash(i)]; + for (i = 0; i < (POL_TAB_MASK + 1); i++) { + p = tcf_police_ht[tcf_hash(i, POL_TAB_MASK)]; - for (; p; p = p->next) { + for (; p; p = p->tcfc_next) { index++; if (index < s_i) continue; @@ -110,48 +94,26 @@ rtattr_failure: skb_trim(skb, (u8*)r - skb->data); goto done; } - -static inline int -tcf_act_police_hash_search(struct tc_action *a, u32 index) -{ - struct tcf_police *p = tcf_police_lookup(index); - - if (p != NULL) { - a->priv = p; - return 1; - } else { - return 0; - } -} #endif -static inline u32 tcf_police_new_index(void) -{ - do { - if (++idx_gen == 0) - idx_gen = 1; - } while (tcf_police_lookup(idx_gen)); - - return idx_gen; -} - void tcf_police_destroy(struct tcf_police *p) { - unsigned h = tcf_police_hash(p->index); - struct tcf_police **p1p; + unsigned int h = tcf_hash(p->tcf_index, POL_TAB_MASK); + struct tcf_common **p1p; - for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->next) { - if (*p1p == p) { + for (p1p = &tcf_police_ht[h]; *p1p; p1p = &(*p1p)->tcfc_next) { + if (*p1p == &p->common) { write_lock_bh(&police_lock); - *p1p = p->next; + *p1p = p->tcf_next; write_unlock_bh(&police_lock); #ifdef CONFIG_NET_ESTIMATOR - gen_kill_estimator(&p->bstats, &p->rate_est); + gen_kill_estimator(&p->tcf_bstats, + &p->tcf_rate_est); #endif - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - if (p->P_tab) - qdisc_put_rtab(p->P_tab); + if (p->tcfp_R_tab) + qdisc_put_rtab(p->tcfp_R_tab); + if (p->tcfp_P_tab) + qdisc_put_rtab(p->tcfp_P_tab); kfree(p); return; } @@ -167,7 +129,7 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, int ret = 0, err; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; - struct tcf_police *p; + struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; if (rta == NULL || rtattr_parse_nested(tb, TCA_POLICE_MAX, rta) < 0) @@ -185,27 +147,32 @@ static int tcf_act_police_locate(struct rtattr *rta, struct rtattr *est, RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) return -EINVAL; - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - a->priv = p; - if (bind) { - p->bindcnt += 1; - p->refcnt += 1; + if (parm->index) { + struct tcf_common *pc; + + pc = tcf_hash_lookup(parm->index, &police_hash_info); + if (pc != NULL) { + a->priv = pc; + police = to_police(pc); + if (bind) { + police->tcf_bindcnt += 1; + police->tcf_refcnt += 1; + } + if (ovr) + goto override; + return ret; } - if (ovr) - goto override; - return ret; } - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (police == NULL) return -ENOMEM; - ret = ACT_P_CREATED; - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (bind) - p->bindcnt = 1; + police->tcf_bindcnt = 1; override: if (parm->rate.rate) { err = -ENOMEM; @@ -215,67 +182,71 @@ override: if (parm->peakrate.rate) { P_tab = qdisc_get_rtab(&parm->peakrate, tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) { + if (P_tab == NULL) { qdisc_put_rtab(R_tab); goto failure; } } } /* No failure allowed after this point */ - spin_lock_bh(&p->lock); + spin_lock_bh(&police->tcf_lock); if (R_tab != NULL) { - qdisc_put_rtab(p->R_tab); - p->R_tab = R_tab; + qdisc_put_rtab(police->tcfp_R_tab); + police->tcfp_R_tab = R_tab; } if (P_tab != NULL) { - qdisc_put_rtab(p->P_tab); - p->P_tab = P_tab; + qdisc_put_rtab(police->tcfp_P_tab); + police->tcfp_P_tab = P_tab; } if (tb[TCA_POLICE_RESULT-1]) - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<R_tab->rate.cell_log; + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); if (est) - gen_replace_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_replace_estimator(&police->tcf_bstats, + &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - spin_unlock_bh(&p->lock); + spin_unlock_bh(&police->tcf_lock); if (ret != ACT_P_CREATED) return ret; - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - h = tcf_police_hash(p->index); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_hash_new_index(&police_idx_gen, &police_hash_info); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - a->priv = p; + a->priv = police; return ret; failure: if (ret == ACT_P_CREATED) - kfree(p); + kfree(police); return err; } static int tcf_act_police_cleanup(struct tc_action *a, int bind) { - struct tcf_police *p = PRIV(a); + struct tcf_police *p = a->priv; if (p != NULL) return tcf_police_release(p, bind); @@ -285,86 +256,87 @@ static int tcf_act_police_cleanup(struct tc_action *a, int bind) static int tcf_act_police(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { + struct tcf_police *police = a->priv; psched_time_t now; - struct tcf_police *p = PRIV(a); long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } static int tcf_act_police_dump(struct sk_buff *skb, struct tc_action *a, int bind, int ref) { unsigned char *b = skb->tail; + struct tcf_police *police = a->priv; struct tc_police opt; - struct tcf_police *p = PRIV(a); - - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - opt.refcnt = p->refcnt - ref; - opt.bindcnt = p->bindcnt - bind; - if (p->R_tab) - opt.rate = p->R_tab->rate; + + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + opt.refcnt = police->tcf_refcnt - ref; + opt.bindcnt = police->tcf_bindcnt - bind; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -379,13 +351,14 @@ MODULE_LICENSE("GPL"); static struct tc_action_ops act_police_ops = { .kind = "police", + .hinfo = &police_hash_info, .type = TCA_ID_POLICE, .capab = TCA_CAP_NONE, .owner = THIS_MODULE, .act = tcf_act_police, .dump = tcf_act_police_dump, .cleanup = tcf_act_police_cleanup, - .lookup = tcf_act_police_hash_search, + .lookup = tcf_hash_search, .init = tcf_act_police_locate, .walk = tcf_act_police_walker }; @@ -407,10 +380,39 @@ module_exit(police_cleanup_module); #else /* CONFIG_NET_CLS_ACT */ -struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) +static struct tcf_common *tcf_police_lookup(u32 index) { - unsigned h; - struct tcf_police *p; + struct tcf_hashinfo *hinfo = &police_hash_info; + struct tcf_common *p; + + read_lock(hinfo->lock); + for (p = hinfo->htab[tcf_hash(index, hinfo->hmask)]; p; + p = p->tcfc_next) { + if (p->tcfc_index == index) + break; + } + read_unlock(hinfo->lock); + + return p; +} + +static u32 tcf_police_new_index(void) +{ + u32 *idx_gen = &police_idx_gen; + u32 val = *idx_gen; + + do { + if (++val == 0) + val = 1; + } while (tcf_police_lookup(val)); + + return (*idx_gen = val); +} + +struct tcf_police *tcf_police_locate(struct rtattr *rta, struct rtattr *est) +{ + unsigned int h; + struct tcf_police *police; struct rtattr *tb[TCA_POLICE_MAX]; struct tc_police *parm; @@ -423,149 +425,158 @@ struct tcf_police * tcf_police_locate(struct rtattr *rta, struct rtattr *est) parm = RTA_DATA(tb[TCA_POLICE_TBF-1]); - if (parm->index && (p = tcf_police_lookup(parm->index)) != NULL) { - p->refcnt++; - return p; - } + if (parm->index) { + struct tcf_common *pc; - p = kzalloc(sizeof(*p), GFP_KERNEL); - if (p == NULL) + pc = tcf_police_lookup(parm->index); + if (pc) { + police = to_police(pc); + police->tcf_refcnt++; + return police; + } + } + police = kzalloc(sizeof(*police), GFP_KERNEL); + if (unlikely(!police)) return NULL; - p->refcnt = 1; - spin_lock_init(&p->lock); - p->stats_lock = &p->lock; + police->tcf_refcnt = 1; + spin_lock_init(&police->tcf_lock); + police->tcf_stats_lock = &police->tcf_lock; if (parm->rate.rate) { - p->R_tab = qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); - if (p->R_tab == NULL) + police->tcfp_R_tab = + qdisc_get_rtab(&parm->rate, tb[TCA_POLICE_RATE-1]); + if (police->tcfp_R_tab == NULL) goto failure; if (parm->peakrate.rate) { - p->P_tab = qdisc_get_rtab(&parm->peakrate, - tb[TCA_POLICE_PEAKRATE-1]); - if (p->P_tab == NULL) + police->tcfp_P_tab = + qdisc_get_rtab(&parm->peakrate, + tb[TCA_POLICE_PEAKRATE-1]); + if (police->tcfp_P_tab == NULL) goto failure; } } if (tb[TCA_POLICE_RESULT-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_RESULT-1]) != sizeof(u32)) goto failure; - p->result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); + police->tcfp_result = *(u32*)RTA_DATA(tb[TCA_POLICE_RESULT-1]); } #ifdef CONFIG_NET_ESTIMATOR if (tb[TCA_POLICE_AVRATE-1]) { if (RTA_PAYLOAD(tb[TCA_POLICE_AVRATE-1]) != sizeof(u32)) goto failure; - p->ewma_rate = *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); + police->tcfp_ewma_rate = + *(u32*)RTA_DATA(tb[TCA_POLICE_AVRATE-1]); } #endif - p->toks = p->burst = parm->burst; - p->mtu = parm->mtu; - if (p->mtu == 0) { - p->mtu = ~0; - if (p->R_tab) - p->mtu = 255<R_tab->rate.cell_log; + police->tcfp_toks = police->tcfp_burst = parm->burst; + police->tcfp_mtu = parm->mtu; + if (police->tcfp_mtu == 0) { + police->tcfp_mtu = ~0; + if (police->tcfp_R_tab) + police->tcfp_mtu = 255<tcfp_R_tab->rate.cell_log; } - if (p->P_tab) - p->ptoks = L2T_P(p, p->mtu); - PSCHED_GET_TIME(p->t_c); - p->index = parm->index ? : tcf_police_new_index(); - p->action = parm->action; + if (police->tcfp_P_tab) + police->tcfp_ptoks = L2T_P(police, police->tcfp_mtu); + PSCHED_GET_TIME(police->tcfp_t_c); + police->tcf_index = parm->index ? parm->index : + tcf_police_new_index(); + police->tcf_action = parm->action; #ifdef CONFIG_NET_ESTIMATOR if (est) - gen_new_estimator(&p->bstats, &p->rate_est, p->stats_lock, est); + gen_new_estimator(&police->tcf_bstats, &police->tcf_rate_est, + police->tcf_stats_lock, est); #endif - h = tcf_police_hash(p->index); + h = tcf_hash(police->tcf_index, POL_TAB_MASK); write_lock_bh(&police_lock); - p->next = tcf_police_ht[h]; - tcf_police_ht[h] = p; + police->tcf_next = tcf_police_ht[h]; + tcf_police_ht[h] = &police->common; write_unlock_bh(&police_lock); - return p; + return police; failure: - if (p->R_tab) - qdisc_put_rtab(p->R_tab); - kfree(p); + if (police->tcfp_R_tab) + qdisc_put_rtab(police->tcfp_R_tab); + kfree(police); return NULL; } -int tcf_police(struct sk_buff *skb, struct tcf_police *p) +int tcf_police(struct sk_buff *skb, struct tcf_police *police) { psched_time_t now; long toks; long ptoks = 0; - spin_lock(&p->lock); + spin_lock(&police->tcf_lock); - p->bstats.bytes += skb->len; - p->bstats.packets++; + police->tcf_bstats.bytes += skb->len; + police->tcf_bstats.packets++; #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate && p->rate_est.bps >= p->ewma_rate) { - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + if (police->tcfp_ewma_rate && + police->tcf_rate_est.bps >= police->tcfp_ewma_rate) { + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } #endif - - if (skb->len <= p->mtu) { - if (p->R_tab == NULL) { - spin_unlock(&p->lock); - return p->result; + if (skb->len <= police->tcfp_mtu) { + if (police->tcfp_R_tab == NULL) { + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } PSCHED_GET_TIME(now); - - toks = PSCHED_TDIFF_SAFE(now, p->t_c, p->burst); - - if (p->P_tab) { - ptoks = toks + p->ptoks; - if (ptoks > (long)L2T_P(p, p->mtu)) - ptoks = (long)L2T_P(p, p->mtu); - ptoks -= L2T_P(p, skb->len); + toks = PSCHED_TDIFF_SAFE(now, police->tcfp_t_c, + police->tcfp_burst); + if (police->tcfp_P_tab) { + ptoks = toks + police->tcfp_ptoks; + if (ptoks > (long)L2T_P(police, police->tcfp_mtu)) + ptoks = (long)L2T_P(police, police->tcfp_mtu); + ptoks -= L2T_P(police, skb->len); } - toks += p->toks; - if (toks > (long)p->burst) - toks = p->burst; - toks -= L2T(p, skb->len); - + toks += police->tcfp_toks; + if (toks > (long)police->tcfp_burst) + toks = police->tcfp_burst; + toks -= L2T(police, skb->len); if ((toks|ptoks) >= 0) { - p->t_c = now; - p->toks = toks; - p->ptoks = ptoks; - spin_unlock(&p->lock); - return p->result; + police->tcfp_t_c = now; + police->tcfp_toks = toks; + police->tcfp_ptoks = ptoks; + spin_unlock(&police->tcf_lock); + return police->tcfp_result; } } - p->qstats.overlimits++; - spin_unlock(&p->lock); - return p->action; + police->tcf_qstats.overlimits++; + spin_unlock(&police->tcf_lock); + return police->tcf_action; } EXPORT_SYMBOL(tcf_police); -int tcf_police_dump(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump(struct sk_buff *skb, struct tcf_police *police) { - unsigned char *b = skb->tail; + unsigned char *b = skb->tail; struct tc_police opt; - opt.index = p->index; - opt.action = p->action; - opt.mtu = p->mtu; - opt.burst = p->burst; - if (p->R_tab) - opt.rate = p->R_tab->rate; + opt.index = police->tcf_index; + opt.action = police->tcf_action; + opt.mtu = police->tcfp_mtu; + opt.burst = police->tcfp_burst; + if (police->tcfp_R_tab) + opt.rate = police->tcfp_R_tab->rate; else memset(&opt.rate, 0, sizeof(opt.rate)); - if (p->P_tab) - opt.peakrate = p->P_tab->rate; + if (police->tcfp_P_tab) + opt.peakrate = police->tcfp_P_tab->rate; else memset(&opt.peakrate, 0, sizeof(opt.peakrate)); RTA_PUT(skb, TCA_POLICE_TBF, sizeof(opt), &opt); - if (p->result) - RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), &p->result); + if (police->tcfp_result) + RTA_PUT(skb, TCA_POLICE_RESULT, sizeof(int), + &police->tcfp_result); #ifdef CONFIG_NET_ESTIMATOR - if (p->ewma_rate) - RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &p->ewma_rate); + if (police->tcfp_ewma_rate) + RTA_PUT(skb, TCA_POLICE_AVRATE, 4, &police->tcfp_ewma_rate); #endif return skb->len; @@ -574,19 +585,20 @@ rtattr_failure: return -1; } -int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *p) +int tcf_police_dump_stats(struct sk_buff *skb, struct tcf_police *police) { struct gnet_dump d; if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS, - TCA_XSTATS, p->stats_lock, &d) < 0) + TCA_XSTATS, police->tcf_stats_lock, + &d) < 0) goto errout; - if (gnet_stats_copy_basic(&d, &p->bstats) < 0 || + if (gnet_stats_copy_basic(&d, &police->tcf_bstats) < 0 || #ifdef CONFIG_NET_ESTIMATOR - gnet_stats_copy_rate_est(&d, &p->rate_est) < 0 || + gnet_stats_copy_rate_est(&d, &police->tcf_rate_est) < 0 || #endif - gnet_stats_copy_queue(&d, &p->qstats) < 0) + gnet_stats_copy_queue(&d, &police->tcf_qstats) < 0) goto errout; if (gnet_stats_finish_copy(&d) < 0) diff --git a/net/sched/act_simple.c b/net/sched/act_simple.c index 17105c82537f..8c1ab8ad8fa6 100644 --- a/net/sched/act_simple.c +++ b/net/sched/act_simple.c @@ -20,54 +20,175 @@ #define TCA_ACT_SIMP 22 -/* XXX: Hide all these common elements under some macro - * probably -*/ #include #include -/* use generic hash table with 8 buckets */ -#define MY_TAB_SIZE 8 -#define MY_TAB_MASK (MY_TAB_SIZE - 1) -static u32 idx_gen; -static struct tcf_defact *tcf_simp_ht[MY_TAB_SIZE]; +#define SIMP_TAB_MASK 7 +static struct tcf_common *tcf_simp_ht[SIMP_TAB_MASK + 1]; +static u32 simp_idx_gen; static DEFINE_RWLOCK(simp_lock); -/* override the defaults */ -#define tcf_st tcf_defact -#define tc_st tc_defact -#define tcf_t_lock simp_lock -#define tcf_ht tcf_simp_ht - -#define CONFIG_NET_ACT_INIT 1 -#include -#include +struct tcf_hashinfo simp_hash_info = { + .htab = tcf_simp_ht, + .hmask = SIMP_TAB_MASK, + .lock = &simp_lock, +}; static int tcf_simp(struct sk_buff *skb, struct tc_action *a, struct tcf_result *res) { - struct tcf_defact *p = PRIV(a, defact); + struct tcf_defact *d = a->priv; - spin_lock(&p->lock); - p->tm.lastuse = jiffies; - p->bstats.bytes += skb->len; - p->bstats.packets++; + spin_lock(&d->tcf_lock); + d->tcf_tm.lastuse = jiffies; + d->tcf_bstats.bytes += skb->len; + d->tcf_bstats.packets++; /* print policy string followed by _ then packet count * Example if this was the 3rd packet and the string was "hello" * then it would look like "hello_3" (without quotes) **/ - printk("simple: %s_%d\n", (char *)p->defdata, p->bstats.packets); - spin_unlock(&p->lock); - return p->action; + printk("simple: %s_%d\n", + (char *)d->tcfd_defdata, d->tcf_bstats.packets); + spin_unlock(&d->tcf_lock); + return d->tcf_action; +} + +static int tcf_simp_release(struct tcf_defact *d, int bind) +{ + int ret = 0; + if (d) { + if (bind) + d->tcf_bindcnt--; + d->tcf_refcnt--; + if (d->tcf_bindcnt <= 0 && d->tcf_refcnt <= 0) { + kfree(d->tcfd_defdata); + tcf_hash_destroy(&d->common, &simp_hash_info); + ret = 1; + } + } + return ret; +} + +static int alloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + d->tcfd_defdata = kmalloc(datalen, GFP_KERNEL); + if (unlikely(!d->tcfd_defdata)) + return -ENOMEM; + d->tcfd_datalen = datalen; + memcpy(d->tcfd_defdata, defdata, datalen); + return 0; +} + +static int realloc_defdata(struct tcf_defact *d, u32 datalen, void *defdata) +{ + kfree(d->tcfd_defdata); + return alloc_defdata(d, datalen, defdata); +} + +static int tcf_simp_init(struct rtattr *rta, struct rtattr *est, + struct tc_action *a, int ovr, int bind) +{ + struct rtattr *tb[TCA_DEF_MAX]; + struct tc_defact *parm; + struct tcf_defact *d; + struct tcf_common *pc; + void *defdata; + u32 datalen = 0; + int ret = 0; + + if (rta == NULL || rtattr_parse_nested(tb, TCA_DEF_MAX, rta) < 0) + return -EINVAL; + + if (tb[TCA_DEF_PARMS - 1] == NULL || + RTA_PAYLOAD(tb[TCA_DEF_PARMS - 1]) < sizeof(*parm)) + return -EINVAL; + + parm = RTA_DATA(tb[TCA_DEF_PARMS - 1]); + defdata = RTA_DATA(tb[TCA_DEF_DATA - 1]); + if (defdata == NULL) + return -EINVAL; + + datalen = RTA_PAYLOAD(tb[TCA_DEF_DATA - 1]); + if (datalen <= 0) + return -EINVAL; + + pc = tcf_hash_check(parm->index, a, bind, &simp_hash_info); + if (!pc) { + pc = tcf_hash_create(parm->index, est, a, sizeof(*d), bind, + &simp_idx_gen, &simp_hash_info); + if (unlikely(!pc)) + return -ENOMEM; + + d = to_defact(pc); + ret = alloc_defdata(d, datalen, defdata); + if (ret < 0) { + kfree(pc); + return ret; + } + ret = ACT_P_CREATED; + } else { + d = to_defact(pc); + if (!ovr) { + tcf_simp_release(d, bind); + return -EEXIST; + } + realloc_defdata(d, datalen, defdata); + } + + spin_lock_bh(&d->tcf_lock); + d->tcf_action = parm->action; + spin_unlock_bh(&d->tcf_lock); + + if (ret == ACT_P_CREATED) + tcf_hash_insert(pc, &simp_hash_info); + return ret; +} + +static inline int tcf_simp_cleanup(struct tc_action *a, int bind) +{ + struct tcf_defact *d = a->priv; + + if (d) + return tcf_simp_release(d, bind); + return 0; +} + +static inline int tcf_simp_dump(struct sk_buff *skb, struct tc_action *a, + int bind, int ref) +{ + unsigned char *b = skb->tail; + struct tcf_defact *d = a->priv; + struct tc_defact opt; + struct tcf_t t; + + opt.index = d->tcf_index; + opt.refcnt = d->tcf_refcnt - ref; + opt.bindcnt = d->tcf_bindcnt - bind; + opt.action = d->tcf_action; + RTA_PUT(skb, TCA_DEF_PARMS, sizeof(opt), &opt); + RTA_PUT(skb, TCA_DEF_DATA, d->tcfd_datalen, d->tcfd_defdata); + t.install = jiffies_to_clock_t(jiffies - d->tcf_tm.install); + t.lastuse = jiffies_to_clock_t(jiffies - d->tcf_tm.lastuse); + t.expires = jiffies_to_clock_t(d->tcf_tm.expires); + RTA_PUT(skb, TCA_DEF_TM, sizeof(t), &t); + return skb->len; + +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; } static struct tc_action_ops act_simp_ops = { - .kind = "simple", - .type = TCA_ACT_SIMP, - .capab = TCA_CAP_NONE, - .owner = THIS_MODULE, - .act = tcf_simp, - tca_use_default_ops + .kind = "simple", + .hinfo = &simp_hash_info, + .type = TCA_ACT_SIMP, + .capab = TCA_CAP_NONE, + .owner = THIS_MODULE, + .act = tcf_simp, + .dump = tcf_simp_dump, + .cleanup = tcf_simp_cleanup, + .init = tcf_simp_init, + .walk = tcf_generic_walker, }; MODULE_AUTHOR("Jamal Hadi Salim(2005)"); -- cgit v1.3-8-gc7d7 From e0a1ad73d34fd6dfdb630479400511e9879069c0 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:00:21 -0700 Subject: [IPv6] route: Simplify ip6_del_rt() Provide a simple ip6_del_rt() for the majority of users and an alternative for the exception via netlink. Avoids code obfuscation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_route.h | 5 +---- net/ipv6/addrconf.c | 6 +++--- net/ipv6/ndisc.c | 4 ++-- net/ipv6/route.c | 18 ++++++++++++------ 4 files changed, 18 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 01bfe404784f..a7e6086a2bd4 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -69,10 +69,7 @@ extern int ip6_ins_rt(struct rt6_info *, struct nlmsghdr *, void *rtattr, struct netlink_skb_parms *req); -extern int ip6_del_rt(struct rt6_info *, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req); +extern int ip6_del_rt(struct rt6_info *); extern int ip6_rt_addr_add(struct in6_addr *addr, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index f1ede9004887..27f2e3309598 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -736,7 +736,7 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (onlink == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else if (!(rt->rt6i_flags & RTF_EXPIRES)) { rt->rt6i_expires = expires; @@ -1662,7 +1662,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len) if (rt && ((rt->rt6i_flags & (RTF_GATEWAY | RTF_DEFAULT)) == 0)) { if (rt->rt6i_flags&RTF_EXPIRES) { if (valid_lft == 0) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } else { rt->rt6i_expires = jiffies + rt_expires; @@ -3557,7 +3557,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) addrconf_leave_anycast(ifp); addrconf_leave_solict(ifp->idev, &ifp->addr); dst_hold(&ifp->rt->u.dst); - if (ip6_del_rt(ifp->rt, NULL, NULL, NULL)) + if (ip6_del_rt(ifp->rt)) dst_free(&ifp->rt->u.dst); break; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 5743e8bffefd..419d65163819 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -961,7 +961,7 @@ static void ndisc_recv_na(struct sk_buff *skb) struct rt6_info *rt; rt = rt6_get_dflt_router(saddr, dev); if (rt) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); } out: @@ -1114,7 +1114,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) if (rt && lifetime == 0) { neigh_clone(neigh); - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 1aca787ead85..8d511de0db1b 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -457,7 +457,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, rt = rt6_get_route_info(prefix, rinfo->prefix_len, gwaddr, dev->ifindex); if (rt && !lifetime) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); rt = NULL; } @@ -813,7 +813,7 @@ static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) if (rt) { if (rt->rt6i_flags & RTF_CACHE) - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); else dst_release(dst); } @@ -1218,7 +1218,8 @@ out: return err; } -int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; struct fib6_table *table; @@ -1237,6 +1238,11 @@ int ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct return err; } +int ip6_del_rt(struct rt6_info *rt) +{ + return __ip6_del_rt(rt, NULL, NULL, NULL); +} + static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req, u32 table_id) @@ -1271,7 +1277,7 @@ static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - return ip6_del_rt(rt, nlh, _rtattr, req); + return __ip6_del_rt(rt, nlh, _rtattr, req); } } read_unlock_bh(&table->tb6_lock); @@ -1395,7 +1401,7 @@ restart: call_netevent_notifiers(NETEVENT_REDIRECT, &netevent); if (rt->rt6i_flags&RTF_CACHE) { - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); return; } @@ -1631,7 +1637,7 @@ restart: if (rt->rt6i_flags & (RTF_DEFAULT | RTF_ADDRCONF)) { dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - ip6_del_rt(rt, NULL, NULL, NULL); + ip6_del_rt(rt); goto restart; } } -- cgit v1.3-8-gc7d7 From 40e22e8f3d4d4f1ff68fb03683f007c53ee8b348 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:00:45 -0700 Subject: [IPv6] route: Simplify ip6_ins_rt() Provide a simple ip6_ins_rt() for the majority of users and an alternative for the exception via netlink. Avoids code obfuscation. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_route.h | 5 +---- net/ipv6/addrconf.c | 2 +- net/ipv6/anycast.c | 2 +- net/ipv6/route.c | 19 ++++++++++++------- 4 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index a7e6086a2bd4..172c4761e2bf 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -65,10 +65,7 @@ extern int ip6_route_add(struct in6_rtmsg *rtmsg, void *rtattr, struct netlink_skb_parms *req, u32 table_id); -extern int ip6_ins_rt(struct rt6_info *, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req); +extern int ip6_ins_rt(struct rt6_info *); extern int ip6_del_rt(struct rt6_info *); extern int ip6_rt_addr_add(struct in6_addr *addr, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 27f2e3309598..aafba9ea9cb6 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3548,7 +3548,7 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) switch (event) { case RTM_NEWADDR: - ip6_ins_rt(ifp->rt, NULL, NULL, NULL); + ip6_ins_rt(ifp->rt); if (ifp->idev->cnf.forwarding) addrconf_join_anycast(ifp); break; diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index f6881d7a0385..abbc35a13e08 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -335,7 +335,7 @@ int ipv6_dev_ac_inc(struct net_device *dev, struct in6_addr *addr) write_unlock_bh(&idev->lock); dst_hold(&rt->u.dst); - if (ip6_ins_rt(rt, NULL, NULL, NULL)) + if (ip6_ins_rt(rt)) dst_release(&rt->u.dst); addrconf_join_solict(dev, &aca->aca_addr); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8d511de0db1b..9ec348a72a95 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -546,8 +546,8 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, be destroyed. */ -int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, + void *_rtattr, struct netlink_skb_parms *req) { int err; struct fib6_table *table; @@ -560,6 +560,11 @@ int ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, return err; } +int ip6_ins_rt(struct rt6_info *rt) +{ + return __ip6_ins_rt(rt, NULL, NULL, NULL); +} + static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, struct in6_addr *saddr) { @@ -657,7 +662,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, NULL); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -752,7 +757,7 @@ restart: dst_hold(&rt->u.dst); if (nrt) { - err = ip6_ins_rt(nrt, NULL, NULL, NULL); + err = ip6_ins_rt(nrt); if (!err) goto out2; } @@ -1206,7 +1211,7 @@ install_route: rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; - return ip6_ins_rt(rt, nlh, _rtattr, req); + return __ip6_ins_rt(rt, nlh, _rtattr, req); out: if (dev) @@ -1393,7 +1398,7 @@ restart: nrt->u.dst.metrics[RTAX_MTU-1] = ipv6_get_mtu(neigh->dev); nrt->u.dst.metrics[RTAX_ADVMSS-1] = ipv6_advmss(dst_mtu(&nrt->u.dst)); - if (ip6_ins_rt(nrt, NULL, NULL, NULL)) + if (ip6_ins_rt(nrt)) goto out; netevent.old = &rt->u.dst; @@ -1483,7 +1488,7 @@ void rt6_pmtu_discovery(struct in6_addr *daddr, struct in6_addr *saddr, dst_set_expires(&nrt->u.dst, ip6_rt_mtu_expires); nrt->rt6i_flags |= RTF_DYNAMIC|RTF_EXPIRES; - ip6_ins_rt(nrt, NULL, NULL, NULL); + ip6_ins_rt(nrt); } out: dst_release(&rt->u.dst); -- cgit v1.3-8-gc7d7 From 86872cb57925c46a6499887d77afb880a892c0ec Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Tue, 22 Aug 2006 00:01:08 -0700 Subject: [IPv6] route: FIB6 configuration using struct fib6_config Replaces the struct in6_rtmsg based interface orignating from the ioctl interface with a struct fib6_config based on. Allows changing the interface without breaking the ioctl interface and avoids passing on tons of parameters. The recently introduced struct nl_info is used to pass on netlink authorship information for notifications. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 38 ++++-- include/net/ip6_route.h | 6 +- net/ipv6/addrconf.c | 65 +++++----- net/ipv6/ip6_fib.c | 19 ++- net/ipv6/route.c | 331 ++++++++++++++++++++++++++++-------------------- 5 files changed, 259 insertions(+), 200 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 69c444209781..9610b887ffb5 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -16,14 +16,35 @@ #ifdef __KERNEL__ #include - -#include -#include #include #include +#include +#include +#include struct rt6_info; +struct fib6_config +{ + u32 fc_table; + u32 fc_metric; + int fc_dst_len; + int fc_src_len; + int fc_ifindex; + u32 fc_flags; + u32 fc_protocol; + + struct in6_addr fc_dst; + struct in6_addr fc_src; + struct in6_addr fc_gateway; + + unsigned long fc_expires; + struct nlattr *fc_mx; + int fc_mx_len; + + struct nl_info fc_nlinfo; +}; + struct fib6_node { struct fib6_node *parent; @@ -175,18 +196,13 @@ extern void fib6_clean_all(int (*func)(struct rt6_info *, void *arg), extern int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh, - void *rtattr, - struct netlink_skb_parms *req); + struct nl_info *info); extern int fib6_del(struct rt6_info *rt, - struct nlmsghdr *nlh, - void *rtattr, - struct netlink_skb_parms *req); + struct nl_info *info); extern void inet6_rt_notify(int event, struct rt6_info *rt, - struct nlmsghdr *nlh, - struct netlink_skb_parms *req); + struct nl_info *info); extern void fib6_run_gc(unsigned long dummy); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 172c4761e2bf..3f170f667c7b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -60,11 +60,7 @@ extern void ip6_route_cleanup(void); extern int ipv6_route_ioctl(unsigned int cmd, void __user *arg); -extern int ip6_route_add(struct in6_rtmsg *rtmsg, - struct nlmsghdr *, - void *rtattr, - struct netlink_skb_parms *req, - u32 table_id); +extern int ip6_route_add(struct fib6_config *cfg); extern int ip6_ins_rt(struct rt6_info *); extern int ip6_del_rt(struct rt6_info *); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index aafba9ea9cb6..fc9cff3426c4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1509,59 +1509,56 @@ static void addrconf_prefix_route(struct in6_addr *pfx, int plen, struct net_device *dev, unsigned long expires, u32 flags) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_PREFIX, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_expires = expires, + .fc_dst_len = plen, + .fc_flags = RTF_UP | flags, + }; - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_copy(&rtmsg.rtmsg_dst, pfx); - rtmsg.rtmsg_dst_len = plen; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_info = expires; - rtmsg.rtmsg_flags = RTF_UP|flags; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; + ipv6_addr_copy(&cfg.fc_dst, pfx); /* Prevent useless cloning on PtP SIT. This thing is done here expecting that the whole class of non-broadcast devices need not cloning. */ - if (dev->type == ARPHRD_SIT && (dev->flags&IFF_POINTOPOINT)) - rtmsg.rtmsg_flags |= RTF_NONEXTHOP; + if (dev->type == ARPHRD_SIT && (dev->flags & IFF_POINTOPOINT)) + cfg.fc_flags |= RTF_NONEXTHOP; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_PREFIX); + ip6_route_add(&cfg); } /* Create "default" multicast route to the interface */ static void addrconf_add_mroute(struct net_device *dev) { - struct in6_rtmsg rtmsg; - - memset(&rtmsg, 0, sizeof(rtmsg)); - ipv6_addr_set(&rtmsg.rtmsg_dst, - htonl(0xFF000000), 0, 0, 0); - rtmsg.rtmsg_dst_len = 8; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; - rtmsg.rtmsg_ifindex = dev->ifindex; - rtmsg.rtmsg_flags = RTF_UP; - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_LOCAL); + struct fib6_config cfg = { + .fc_table = RT6_TABLE_LOCAL, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 8, + .fc_flags = RTF_UP, + }; + + ipv6_addr_set(&cfg.fc_dst, htonl(0xFF000000), 0, 0, 0); + + ip6_route_add(&cfg); } static void sit_route_add(struct net_device *dev) { - struct in6_rtmsg rtmsg; - - memset(&rtmsg, 0, sizeof(rtmsg)); - - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - rtmsg.rtmsg_metric = IP6_RT_PRIO_ADDRCONF; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_MAIN, + .fc_metric = IP6_RT_PRIO_ADDRCONF, + .fc_ifindex = dev->ifindex, + .fc_dst_len = 96, + .fc_flags = RTF_UP | RTF_NONEXTHOP, + }; /* prefix length - 96 bits "::d.d.d.d" */ - rtmsg.rtmsg_dst_len = 96; - rtmsg.rtmsg_flags = RTF_UP|RTF_NONEXTHOP; - rtmsg.rtmsg_ifindex = dev->ifindex; - - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_MAIN); + ip6_route_add(&cfg); } static void addrconf_add_lroute(struct net_device *dev) diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index be36f4acda94..667b1b1ea25d 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -610,7 +610,7 @@ insert_above: */ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, - struct nlmsghdr *nlh, struct netlink_skb_parms *req) + struct nl_info *info) { struct rt6_info *iter = NULL; struct rt6_info **ins; @@ -665,7 +665,7 @@ out: *ins = rt; rt->rt6i_node = fn; atomic_inc(&rt->rt6i_ref); - inet6_rt_notify(RTM_NEWROUTE, rt, nlh, req); + inet6_rt_notify(RTM_NEWROUTE, rt, info); rt6_stats.fib_rt_entries++; if ((fn->fn_flags & RTN_RTINFO) == 0) { @@ -695,8 +695,7 @@ void fib6_force_start_gc(void) * with source addr info in sub-trees */ -int fib6_add(struct fib6_node *root, struct rt6_info *rt, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn; int err = -ENOMEM; @@ -769,7 +768,7 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, } #endif - err = fib6_add_rt2node(fn, rt, nlh, req); + err = fib6_add_rt2node(fn, rt, info); if (err == 0) { fib6_start_gc(rt); @@ -1076,7 +1075,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) } static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, - struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) + struct nl_info *info) { struct fib6_walker_t *w; struct rt6_info *rt = *rtp; @@ -1132,11 +1131,11 @@ static void fib6_del_route(struct fib6_node *fn, struct rt6_info **rtp, if (atomic_read(&rt->rt6i_ref) != 1) BUG(); } - inet6_rt_notify(RTM_DELROUTE, rt, nlh, req); + inet6_rt_notify(RTM_DELROUTE, rt, info); rt6_release(rt); } -int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct netlink_skb_parms *req) +int fib6_del(struct rt6_info *rt, struct nl_info *info) { struct fib6_node *fn = rt->rt6i_node; struct rt6_info **rtp; @@ -1161,7 +1160,7 @@ int fib6_del(struct rt6_info *rt, struct nlmsghdr *nlh, void *_rtattr, struct ne for (rtp = &fn->leaf; *rtp; rtp = &(*rtp)->u.next) { if (*rtp == rt) { - fib6_del_route(fn, rtp, nlh, _rtattr, req); + fib6_del_route(fn, rtp, info); return 0; } } @@ -1290,7 +1289,7 @@ static int fib6_clean_node(struct fib6_walker_t *w) res = c->func(rt, c->arg); if (res < 0) { w->leaf = rt; - res = fib6_del(rt, NULL, NULL, NULL); + res = fib6_del(rt, NULL); if (res) { #if RT6_DEBUG >= 2 printk(KERN_DEBUG "fib6_clean_node: del failed: rt=%p@%p err=%d\n", rt, rt->rt6i_node, res); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 9ec348a72a95..7bcffa6ddba3 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -546,15 +546,14 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, be destroyed. */ -static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_ins_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_add(&table->tb6_root, rt, nlh, _rtattr, req); + err = fib6_add(&table->tb6_root, rt, info); write_unlock_bh(&table->tb6_lock); return err; @@ -562,7 +561,7 @@ static int __ip6_ins_rt(struct rt6_info *rt, struct nlmsghdr *nlh, int ip6_ins_rt(struct rt6_info *rt) { - return __ip6_ins_rt(rt, NULL, NULL, NULL); + return __ip6_ins_rt(rt, NULL); } static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, struct in6_addr *daddr, @@ -1014,30 +1013,24 @@ int ipv6_get_hoplimit(struct net_device *dev) * */ -int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req, - u32 table_id) +int ip6_route_add(struct fib6_config *cfg) { int err; - struct rtmsg *r; - struct rtattr **rta; struct rt6_info *rt = NULL; struct net_device *dev = NULL; struct inet6_dev *idev = NULL; struct fib6_table *table; int addr_type; - rta = (struct rtattr **) _rtattr; - - if (rtmsg->rtmsg_dst_len > 128 || rtmsg->rtmsg_src_len > 128) + if (cfg->fc_dst_len > 128 || cfg->fc_src_len > 128) return -EINVAL; #ifndef CONFIG_IPV6_SUBTREES - if (rtmsg->rtmsg_src_len) + if (cfg->fc_src_len) return -EINVAL; #endif - if (rtmsg->rtmsg_ifindex) { + if (cfg->fc_ifindex) { err = -ENODEV; - dev = dev_get_by_index(rtmsg->rtmsg_ifindex); + dev = dev_get_by_index(cfg->fc_ifindex); if (!dev) goto out; idev = in6_dev_get(dev); @@ -1045,10 +1038,10 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto out; } - if (rtmsg->rtmsg_metric == 0) - rtmsg->rtmsg_metric = IP6_RT_PRIO_USER; + if (cfg->fc_metric == 0) + cfg->fc_metric = IP6_RT_PRIO_USER; - table = fib6_new_table(table_id); + table = fib6_new_table(cfg->fc_table); if (table == NULL) { err = -ENOBUFS; goto out; @@ -1062,14 +1055,13 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } rt->u.dst.obsolete = -1; - rt->rt6i_expires = jiffies + clock_t_to_jiffies(rtmsg->rtmsg_info); - if (nlh && (r = NLMSG_DATA(nlh))) { - rt->rt6i_protocol = r->rtm_protocol; - } else { - rt->rt6i_protocol = RTPROT_BOOT; - } + rt->rt6i_expires = jiffies + clock_t_to_jiffies(cfg->fc_expires); + + if (cfg->fc_protocol == RTPROT_UNSPEC) + cfg->fc_protocol = RTPROT_BOOT; + rt->rt6i_protocol = cfg->fc_protocol; - addr_type = ipv6_addr_type(&rtmsg->rtmsg_dst); + addr_type = ipv6_addr_type(&cfg->fc_dst); if (addr_type & IPV6_ADDR_MULTICAST) rt->u.dst.input = ip6_mc_input; @@ -1078,24 +1070,22 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, rt->u.dst.output = ip6_output; - ipv6_addr_prefix(&rt->rt6i_dst.addr, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len); - rt->rt6i_dst.plen = rtmsg->rtmsg_dst_len; + ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); + rt->rt6i_dst.plen = cfg->fc_dst_len; if (rt->rt6i_dst.plen == 128) rt->u.dst.flags = DST_HOST; #ifdef CONFIG_IPV6_SUBTREES - ipv6_addr_prefix(&rt->rt6i_src.addr, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); - rt->rt6i_src.plen = rtmsg->rtmsg_src_len; + ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); + rt->rt6i_src.plen = cfg->fc_src_len; #endif - rt->rt6i_metric = rtmsg->rtmsg_metric; + rt->rt6i_metric = cfg->fc_metric; /* We cannot add true routes via loopback here, they would result in kernel looping; promote them to reject routes */ - if ((rtmsg->rtmsg_flags&RTF_REJECT) || + if ((cfg->fc_flags & RTF_REJECT) || (dev && (dev->flags&IFF_LOOPBACK) && !(addr_type&IPV6_ADDR_LOOPBACK))) { /* hold loopback dev/idev if we haven't done so. */ if (dev != &loopback_dev) { @@ -1118,12 +1108,12 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, goto install_route; } - if (rtmsg->rtmsg_flags & RTF_GATEWAY) { + if (cfg->fc_flags & RTF_GATEWAY) { struct in6_addr *gw_addr; int gwa_type; - gw_addr = &rtmsg->rtmsg_gateway; - ipv6_addr_copy(&rt->rt6i_gateway, &rtmsg->rtmsg_gateway); + gw_addr = &cfg->fc_gateway; + ipv6_addr_copy(&rt->rt6i_gateway, gw_addr); gwa_type = ipv6_addr_type(gw_addr); if (gwa_type != (IPV6_ADDR_LINKLOCAL|IPV6_ADDR_UNICAST)) { @@ -1140,7 +1130,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (!(gwa_type&IPV6_ADDR_UNICAST)) goto out; - grt = rt6_lookup(gw_addr, NULL, rtmsg->rtmsg_ifindex, 1); + grt = rt6_lookup(gw_addr, NULL, cfg->fc_ifindex, 1); err = -EHOSTUNREACH; if (grt == NULL) @@ -1172,7 +1162,7 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, if (dev == NULL) goto out; - if (rtmsg->rtmsg_flags & (RTF_GATEWAY|RTF_NONEXTHOP)) { + if (cfg->fc_flags & (RTF_GATEWAY | RTF_NONEXTHOP)) { rt->rt6i_nexthop = __neigh_lookup_errno(&nd_tbl, &rt->rt6i_gateway, dev); if (IS_ERR(rt->rt6i_nexthop)) { err = PTR_ERR(rt->rt6i_nexthop); @@ -1181,24 +1171,24 @@ int ip6_route_add(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, } } - rt->rt6i_flags = rtmsg->rtmsg_flags; + rt->rt6i_flags = cfg->fc_flags; install_route: - if (rta && rta[RTA_METRICS-1]) { - int attrlen = RTA_PAYLOAD(rta[RTA_METRICS-1]); - struct rtattr *attr = RTA_DATA(rta[RTA_METRICS-1]); - - while (RTA_OK(attr, attrlen)) { - unsigned flavor = attr->rta_type; - if (flavor) { - if (flavor > RTAX_MAX) { + if (cfg->fc_mx) { + struct nlattr *nla; + int remaining; + + nla_for_each_attr(nla, cfg->fc_mx, cfg->fc_mx_len, remaining) { + int type = nla->nla_type; + + if (type) { + if (type > RTAX_MAX) { err = -EINVAL; goto out; } - rt->u.dst.metrics[flavor-1] = - *(u32 *)RTA_DATA(attr); + + rt->u.dst.metrics[type - 1] = nla_get_u32(nla); } - attr = RTA_NEXT(attr, attrlen); } } @@ -1211,7 +1201,7 @@ install_route: rt->u.dst.dev = dev; rt->rt6i_idev = idev; rt->rt6i_table = table; - return __ip6_ins_rt(rt, nlh, _rtattr, req); + return __ip6_ins_rt(rt, &cfg->fc_nlinfo); out: if (dev) @@ -1223,8 +1213,7 @@ out: return err; } -static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req) +static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) { int err; struct fib6_table *table; @@ -1235,7 +1224,7 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, nlh, _rtattr, req); + err = fib6_del(rt, info); dst_release(&rt->u.dst); write_unlock_bh(&table->tb6_lock); @@ -1245,44 +1234,41 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nlmsghdr *nlh, int ip6_del_rt(struct rt6_info *rt) { - return __ip6_del_rt(rt, NULL, NULL, NULL); + return __ip6_del_rt(rt, NULL); } -static int ip6_route_del(struct in6_rtmsg *rtmsg, struct nlmsghdr *nlh, - void *_rtattr, struct netlink_skb_parms *req, - u32 table_id) +static int ip6_route_del(struct fib6_config *cfg) { struct fib6_table *table; struct fib6_node *fn; struct rt6_info *rt; int err = -ESRCH; - table = fib6_get_table(table_id); + table = fib6_get_table(cfg->fc_table); if (table == NULL) return err; read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, - &rtmsg->rtmsg_dst, rtmsg->rtmsg_dst_len, - &rtmsg->rtmsg_src, rtmsg->rtmsg_src_len); + &cfg->fc_dst, cfg->fc_dst_len, + &cfg->fc_src, cfg->fc_src_len); if (fn) { for (rt = fn->leaf; rt; rt = rt->u.next) { - if (rtmsg->rtmsg_ifindex && + if (cfg->fc_ifindex && (rt->rt6i_dev == NULL || - rt->rt6i_dev->ifindex != rtmsg->rtmsg_ifindex)) + rt->rt6i_dev->ifindex != cfg->fc_ifindex)) continue; - if (rtmsg->rtmsg_flags&RTF_GATEWAY && - !ipv6_addr_equal(&rtmsg->rtmsg_gateway, &rt->rt6i_gateway)) + if (cfg->fc_flags & RTF_GATEWAY && + !ipv6_addr_equal(&cfg->fc_gateway, &rt->rt6i_gateway)) continue; - if (rtmsg->rtmsg_metric && - rtmsg->rtmsg_metric != rt->rt6i_metric) + if (cfg->fc_metric && cfg->fc_metric != rt->rt6i_metric) continue; dst_hold(&rt->u.dst); read_unlock_bh(&table->tb6_lock); - return __ip6_del_rt(rt, nlh, _rtattr, req); + return __ip6_del_rt(rt, &cfg->fc_nlinfo); } } read_unlock_bh(&table->tb6_lock); @@ -1565,21 +1551,23 @@ static struct rt6_info *rt6_add_route_info(struct in6_addr *prefix, int prefixle struct in6_addr *gwaddr, int ifindex, unsigned pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_INFO, + .fc_metric = 1024, + .fc_ifindex = ifindex, + .fc_dst_len = prefixlen, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | + RTF_UP | RTF_PREF(pref), + }; + + ipv6_addr_copy(&cfg.fc_dst, prefix); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - memset(&rtmsg, 0, sizeof(rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_dst, prefix); - rtmsg.rtmsg_dst_len = prefixlen; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref); /* We should treat it as a default route if prefix length is 0. */ if (!prefixlen) - rtmsg.rtmsg_flags |= RTF_DEFAULT; - rtmsg.rtmsg_ifindex = ifindex; + cfg.fc_flags |= RTF_DEFAULT; - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_INFO); + ip6_route_add(&cfg); return rt6_get_route_info(prefix, prefixlen, gwaddr, ifindex); } @@ -1611,18 +1599,18 @@ struct rt6_info *rt6_add_dflt_router(struct in6_addr *gwaddr, struct net_device *dev, unsigned int pref) { - struct in6_rtmsg rtmsg; + struct fib6_config cfg = { + .fc_table = RT6_TABLE_DFLT, + .fc_metric = 1024, + .fc_ifindex = dev->ifindex, + .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | + RTF_UP | RTF_EXPIRES | RTF_PREF(pref), + }; - memset(&rtmsg, 0, sizeof(struct in6_rtmsg)); - rtmsg.rtmsg_type = RTMSG_NEWROUTE; - ipv6_addr_copy(&rtmsg.rtmsg_gateway, gwaddr); - rtmsg.rtmsg_metric = 1024; - rtmsg.rtmsg_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | - RTF_PREF(pref); + ipv6_addr_copy(&cfg.fc_gateway, gwaddr); - rtmsg.rtmsg_ifindex = dev->ifindex; + ip6_route_add(&cfg); - ip6_route_add(&rtmsg, NULL, NULL, NULL, RT6_TABLE_DFLT); return rt6_get_dflt_router(gwaddr, dev); } @@ -1649,8 +1637,27 @@ restart: read_unlock_bh(&table->tb6_lock); } +static void rtmsg_to_fib6_config(struct in6_rtmsg *rtmsg, + struct fib6_config *cfg) +{ + memset(cfg, 0, sizeof(*cfg)); + + cfg->fc_table = RT6_TABLE_MAIN; + cfg->fc_ifindex = rtmsg->rtmsg_ifindex; + cfg->fc_metric = rtmsg->rtmsg_metric; + cfg->fc_expires = rtmsg->rtmsg_info; + cfg->fc_dst_len = rtmsg->rtmsg_dst_len; + cfg->fc_src_len = rtmsg->rtmsg_src_len; + cfg->fc_flags = rtmsg->rtmsg_flags; + + ipv6_addr_copy(&cfg->fc_dst, &rtmsg->rtmsg_dst); + ipv6_addr_copy(&cfg->fc_src, &rtmsg->rtmsg_src); + ipv6_addr_copy(&cfg->fc_gateway, &rtmsg->rtmsg_gateway); +} + int ipv6_route_ioctl(unsigned int cmd, void __user *arg) { + struct fib6_config cfg; struct in6_rtmsg rtmsg; int err; @@ -1663,16 +1670,16 @@ int ipv6_route_ioctl(unsigned int cmd, void __user *arg) sizeof(struct in6_rtmsg)); if (err) return -EFAULT; - + + rtmsg_to_fib6_config(&rtmsg, &cfg); + rtnl_lock(); switch (cmd) { case SIOCADDRT: - err = ip6_route_add(&rtmsg, NULL, NULL, NULL, - RT6_TABLE_MAIN); + err = ip6_route_add(&cfg); break; case SIOCDELRT: - err = ip6_route_del(&rtmsg, NULL, NULL, NULL, - RT6_TABLE_MAIN); + err = ip6_route_del(&cfg); break; default: err = -EINVAL; @@ -1823,66 +1830,104 @@ void rt6_mtu_change(struct net_device *dev, unsigned mtu) fib6_clean_all(rt6_mtu_change_route, 0, &arg); } -static int inet6_rtm_to_rtmsg(struct rtmsg *r, struct rtattr **rta, - struct in6_rtmsg *rtmsg) +static struct nla_policy rtm_ipv6_policy[RTA_MAX+1] __read_mostly = { + [RTA_GATEWAY] = { .minlen = sizeof(struct in6_addr) }, + [RTA_OIF] = { .type = NLA_U32 }, + [RTA_PRIORITY] = { .type = NLA_U32 }, + [RTA_METRICS] = { .type = NLA_NESTED }, +}; + +static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, + struct fib6_config *cfg) { - memset(rtmsg, 0, sizeof(*rtmsg)); + struct rtmsg *rtm; + struct nlattr *tb[RTA_MAX+1]; + int err; + + err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); + if (err < 0) + goto errout; - rtmsg->rtmsg_dst_len = r->rtm_dst_len; - rtmsg->rtmsg_src_len = r->rtm_src_len; - rtmsg->rtmsg_flags = RTF_UP; - if (r->rtm_type == RTN_UNREACHABLE) - rtmsg->rtmsg_flags |= RTF_REJECT; + err = -EINVAL; + rtm = nlmsg_data(nlh); + memset(cfg, 0, sizeof(*cfg)); - if (rta[RTA_GATEWAY-1]) { - if (rta[RTA_GATEWAY-1]->rta_len != RTA_LENGTH(16)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_gateway, RTA_DATA(rta[RTA_GATEWAY-1]), 16); - rtmsg->rtmsg_flags |= RTF_GATEWAY; - } - if (rta[RTA_DST-1]) { - if (RTA_PAYLOAD(rta[RTA_DST-1]) < ((r->rtm_dst_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_dst, RTA_DATA(rta[RTA_DST-1]), ((r->rtm_dst_len+7)>>3)); + cfg->fc_table = rtm->rtm_table; + cfg->fc_dst_len = rtm->rtm_dst_len; + cfg->fc_src_len = rtm->rtm_src_len; + cfg->fc_flags = RTF_UP; + cfg->fc_protocol = rtm->rtm_protocol; + + if (rtm->rtm_type == RTN_UNREACHABLE) + cfg->fc_flags |= RTF_REJECT; + + cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.nlh = nlh; + + if (tb[RTA_GATEWAY]) { + nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_flags |= RTF_GATEWAY; } - if (rta[RTA_SRC-1]) { - if (RTA_PAYLOAD(rta[RTA_SRC-1]) < ((r->rtm_src_len+7)>>3)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_src, RTA_DATA(rta[RTA_SRC-1]), ((r->rtm_src_len+7)>>3)); + + if (tb[RTA_DST]) { + int plen = (rtm->rtm_dst_len + 7) >> 3; + + if (nla_len(tb[RTA_DST]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_dst, tb[RTA_DST], plen); } - if (rta[RTA_OIF-1]) { - if (rta[RTA_OIF-1]->rta_len != RTA_LENGTH(sizeof(int))) - return -EINVAL; - memcpy(&rtmsg->rtmsg_ifindex, RTA_DATA(rta[RTA_OIF-1]), sizeof(int)); + + if (tb[RTA_SRC]) { + int plen = (rtm->rtm_src_len + 7) >> 3; + + if (nla_len(tb[RTA_SRC]) < plen) + goto errout; + + nla_memcpy(&cfg->fc_src, tb[RTA_SRC], plen); } - if (rta[RTA_PRIORITY-1]) { - if (rta[RTA_PRIORITY-1]->rta_len != RTA_LENGTH(4)) - return -EINVAL; - memcpy(&rtmsg->rtmsg_metric, RTA_DATA(rta[RTA_PRIORITY-1]), 4); + + if (tb[RTA_OIF]) + cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); + + if (tb[RTA_PRIORITY]) + cfg->fc_metric = nla_get_u32(tb[RTA_PRIORITY]); + + if (tb[RTA_METRICS]) { + cfg->fc_mx = nla_data(tb[RTA_METRICS]); + cfg->fc_mx_len = nla_len(tb[RTA_METRICS]); } - return 0; + + if (tb[RTA_TABLE]) + cfg->fc_table = nla_get_u32(tb[RTA_TABLE]); + + err = 0; +errout: + return err; } int inet6_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_del(&rtmsg, nlh, arg, &NETLINK_CB(skb), - rtm_get_table(arg, r->rtm_table)); + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return ip6_route_del(&cfg); } int inet6_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) { - struct rtmsg *r = NLMSG_DATA(nlh); - struct in6_rtmsg rtmsg; + struct fib6_config cfg; + int err; - if (inet6_rtm_to_rtmsg(r, arg, &rtmsg)) - return -EINVAL; - return ip6_route_add(&rtmsg, nlh, arg, &NETLINK_CB(skb), - rtm_get_table(arg, r->rtm_table)); + err = rtm_to_fib6_config(skb, nlh, &cfg); + if (err < 0) + return err; + + return ip6_route_add(&cfg); } static int rt6_fill_node(struct sk_buff *skb, struct rt6_info *rt, @@ -2063,15 +2108,21 @@ out_free: goto out; } -void inet6_rt_notify(int event, struct rt6_info *rt, struct nlmsghdr *nlh, - struct netlink_skb_parms *req) +void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; - u32 seq = nlh ? nlh->nlmsg_seq : 0; + u32 pid = 0, seq = 0; + struct nlmsghdr *nlh = NULL; int payload = sizeof(struct rtmsg) + 256; int err = -ENOBUFS; + if (info) { + pid = info->pid; + nlh = info->nlh; + if (nlh) + seq = nlh->nlmsg_seq; + } + skb = nlmsg_new(nlmsg_total_size(payload), gfp_any()); if (skb == NULL) goto errout; -- cgit v1.3-8-gc7d7 From ac0b04627269ff16c3c7ab854a65fe6780c6e3e5 Mon Sep 17 00:00:00 2001 From: Sridhar Samudrala Date: Tue, 22 Aug 2006 00:15:33 -0700 Subject: [SCTP]: Extend /proc/net/sctp/snmp to provide more statistics. This patch adds more statistics info under /proc/net/sctp/snmp that should be useful for debugging. The additional events that are counted now include timer expirations, retransmits, packet and data chunk discards. The Data chunk discards include all the cases where a data chunk is discarded including high tsn, bad stream, dup tsn and the most useful one(out of receive buffer/rwnd). Also moved the SCTP MIB data structures from the generic include directories to include/sctp/sctp.h. Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/linux/snmp.h | 33 --------------------------------- include/net/sctp/sctp.h | 44 ++++++++++++++++++++++++++++++++++++++++++++ include/net/snmp.h | 6 ------ net/sctp/input.c | 8 ++++++-- net/sctp/inqueue.c | 4 ++-- net/sctp/outqueue.c | 6 +++++- net/sctp/proc.c | 17 ++++++++++++++++- net/sctp/sm_statefuns.c | 15 +++++++++++++++ 8 files changed, 88 insertions(+), 45 deletions(-) (limited to 'include/net') diff --git a/include/linux/snmp.h b/include/linux/snmp.h index 30156556f78d..854aa6b543f1 100644 --- a/include/linux/snmp.h +++ b/include/linux/snmp.h @@ -160,39 +160,6 @@ enum __UDP_MIB_MAX }; -/* sctp mib definitions */ -/* - * draft-ietf-sigtran-sctp-mib-07.txt - */ -enum -{ - SCTP_MIB_NUM = 0, - SCTP_MIB_CURRESTAB, /* CurrEstab */ - SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ - SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ - SCTP_MIB_ABORTEDS, /* Aborteds */ - SCTP_MIB_SHUTDOWNS, /* Shutdowns */ - SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ - SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ - SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ - SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ - SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ - SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ - SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ - SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ - SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ - SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ - SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ - SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ - SCTP_MIB_RTOALGORITHM, /* RtoAlgorithm */ - SCTP_MIB_RTOMIN, /* RtoMin */ - SCTP_MIB_RTOMAX, /* RtoMax */ - SCTP_MIB_RTOINITIAL, /* RtoInitial */ - SCTP_MIB_VALCOOKIELIFE, /* ValCookieLife */ - SCTP_MIB_MAXINITRETR, /* MaxInitRetr */ - __SCTP_MIB_MAX -}; - /* linux mib definitions */ enum { diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 1c1abce5f6b6..e274fd479990 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -216,6 +216,50 @@ DECLARE_SNMP_STAT(struct sctp_mib, sctp_statistics); #endif /* !TEST_FRAME */ +/* sctp mib definitions */ +enum +{ + SCTP_MIB_NUM = 0, + SCTP_MIB_CURRESTAB, /* CurrEstab */ + SCTP_MIB_ACTIVEESTABS, /* ActiveEstabs */ + SCTP_MIB_PASSIVEESTABS, /* PassiveEstabs */ + SCTP_MIB_ABORTEDS, /* Aborteds */ + SCTP_MIB_SHUTDOWNS, /* Shutdowns */ + SCTP_MIB_OUTOFBLUES, /* OutOfBlues */ + SCTP_MIB_CHECKSUMERRORS, /* ChecksumErrors */ + SCTP_MIB_OUTCTRLCHUNKS, /* OutCtrlChunks */ + SCTP_MIB_OUTORDERCHUNKS, /* OutOrderChunks */ + SCTP_MIB_OUTUNORDERCHUNKS, /* OutUnorderChunks */ + SCTP_MIB_INCTRLCHUNKS, /* InCtrlChunks */ + SCTP_MIB_INORDERCHUNKS, /* InOrderChunks */ + SCTP_MIB_INUNORDERCHUNKS, /* InUnorderChunks */ + SCTP_MIB_FRAGUSRMSGS, /* FragUsrMsgs */ + SCTP_MIB_REASMUSRMSGS, /* ReasmUsrMsgs */ + SCTP_MIB_OUTSCTPPACKS, /* OutSCTPPacks */ + SCTP_MIB_INSCTPPACKS, /* InSCTPPacks */ + SCTP_MIB_T1_INIT_EXPIREDS, + SCTP_MIB_T1_COOKIE_EXPIREDS, + SCTP_MIB_T2_SHUTDOWN_EXPIREDS, + SCTP_MIB_T3_RTX_EXPIREDS, + SCTP_MIB_T4_RTO_EXPIREDS, + SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS, + SCTP_MIB_DELAY_SACK_EXPIREDS, + SCTP_MIB_AUTOCLOSE_EXPIREDS, + SCTP_MIB_T3_RETRANSMITS, + SCTP_MIB_PMTUD_RETRANSMITS, + SCTP_MIB_FAST_RETRANSMITS, + SCTP_MIB_IN_PKT_SOFTIRQ, + SCTP_MIB_IN_PKT_BACKLOG, + SCTP_MIB_IN_PKT_DISCARDS, + SCTP_MIB_IN_DATA_CHUNK_DISCARDS, + __SCTP_MIB_MAX +}; + +#define SCTP_MIB_MAX __SCTP_MIB_MAX +struct sctp_mib { + unsigned long mibs[SCTP_MIB_MAX]; +} __SNMP_MIB_ALIGN__; + /* Print debugging messages. */ #if SCTP_DEBUG diff --git a/include/net/snmp.h b/include/net/snmp.h index a36bed8ea210..464970e39ec0 100644 --- a/include/net/snmp.h +++ b/include/net/snmp.h @@ -100,12 +100,6 @@ struct udp_mib { unsigned long mibs[UDP_MIB_MAX]; } __SNMP_MIB_ALIGN__; -/* SCTP */ -#define SCTP_MIB_MAX __SCTP_MIB_MAX -struct sctp_mib { - unsigned long mibs[SCTP_MIB_MAX]; -} __SNMP_MIB_ALIGN__; - /* Linux */ #define LINUX_MIB_MAX __LINUX_MIB_MAX struct linux_mib { diff --git a/net/sctp/input.c b/net/sctp/input.c index 42b66e74bbb5..8a34d95602ce 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -255,10 +255,13 @@ int sctp_rcv(struct sk_buff *skb) */ sctp_bh_lock_sock(sk); - if (sock_owned_by_user(sk)) + if (sock_owned_by_user(sk)) { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_BACKLOG); sctp_add_backlog(sk, skb); - else + } else { + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_SOFTIRQ); sctp_inq_push(&chunk->rcvr->inqueue, chunk); + } sctp_bh_unlock_sock(sk); @@ -271,6 +274,7 @@ int sctp_rcv(struct sk_buff *skb) return 0; discard_it: + SCTP_INC_STATS_BH(SCTP_MIB_IN_PKT_DISCARDS); kfree_skb(skb); return 0; diff --git a/net/sctp/inqueue.c b/net/sctp/inqueue.c index cf0c767d43ae..cf6deed7e849 100644 --- a/net/sctp/inqueue.c +++ b/net/sctp/inqueue.c @@ -87,7 +87,7 @@ void sctp_inq_free(struct sctp_inq *queue) /* Put a new packet in an SCTP inqueue. * We assume that packet->sctp_hdr is set and in host byte order. */ -void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) +void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *chunk) { /* Directly call the packet handling routine. */ @@ -96,7 +96,7 @@ void sctp_inq_push(struct sctp_inq *q, struct sctp_chunk *packet) * Eventually, we should clean up inqueue to not rely * on the BH related data structures. */ - list_add_tail(&packet->list, &q->in_chunk_list); + list_add_tail(&chunk->list, &q->in_chunk_list); q->immediate.func(q->immediate.data); } diff --git a/net/sctp/outqueue.c b/net/sctp/outqueue.c index 30b710c54e64..37074a39ecbb 100644 --- a/net/sctp/outqueue.c +++ b/net/sctp/outqueue.c @@ -467,6 +467,7 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, switch(reason) { case SCTP_RTXR_T3_RTX: + SCTP_INC_STATS(SCTP_MIB_T3_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_T3_RTX); /* Update the retran path if the T3-rtx timer has expired for * the current retran path. @@ -475,12 +476,15 @@ void sctp_retransmit(struct sctp_outq *q, struct sctp_transport *transport, sctp_assoc_update_retran_path(transport->asoc); break; case SCTP_RTXR_FAST_RTX: + SCTP_INC_STATS(SCTP_MIB_FAST_RETRANSMITS); sctp_transport_lower_cwnd(transport, SCTP_LOWER_CWND_FAST_RTX); fast_retransmit = 1; break; case SCTP_RTXR_PMTUD: - default: + SCTP_INC_STATS(SCTP_MIB_PMTUD_RETRANSMITS); break; + default: + BUG(); } sctp_retransmit_mark(q, transport, fast_retransmit); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 5b3b0e0ae7e5..a356d8d310a9 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -57,6 +57,21 @@ static struct snmp_mib sctp_snmp_list[] = { SNMP_MIB_ITEM("SctpReasmUsrMsgs", SCTP_MIB_REASMUSRMSGS), SNMP_MIB_ITEM("SctpOutSCTPPacks", SCTP_MIB_OUTSCTPPACKS), SNMP_MIB_ITEM("SctpInSCTPPacks", SCTP_MIB_INSCTPPACKS), + SNMP_MIB_ITEM("SctpT1InitExpireds", SCTP_MIB_T1_INIT_EXPIREDS), + SNMP_MIB_ITEM("SctpT1CookieExpireds", SCTP_MIB_T1_COOKIE_EXPIREDS), + SNMP_MIB_ITEM("SctpT2ShutdownExpireds", SCTP_MIB_T2_SHUTDOWN_EXPIREDS), + SNMP_MIB_ITEM("SctpT3RtxExpireds", SCTP_MIB_T3_RTX_EXPIREDS), + SNMP_MIB_ITEM("SctpT4RtoExpireds", SCTP_MIB_T4_RTO_EXPIREDS), + SNMP_MIB_ITEM("SctpT5ShutdownGuardExpireds", SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS), + SNMP_MIB_ITEM("SctpDelaySackExpireds", SCTP_MIB_DELAY_SACK_EXPIREDS), + SNMP_MIB_ITEM("SctpAutocloseExpireds", SCTP_MIB_AUTOCLOSE_EXPIREDS), + SNMP_MIB_ITEM("SctpT3Retransmits", SCTP_MIB_T3_RETRANSMITS), + SNMP_MIB_ITEM("SctpPmtudRetransmits", SCTP_MIB_PMTUD_RETRANSMITS), + SNMP_MIB_ITEM("SctpFastRetransmits", SCTP_MIB_FAST_RETRANSMITS), + SNMP_MIB_ITEM("SctpInPktSoftirq", SCTP_MIB_IN_PKT_SOFTIRQ), + SNMP_MIB_ITEM("SctpInPktBacklog", SCTP_MIB_IN_PKT_BACKLOG), + SNMP_MIB_ITEM("SctpInPktDiscards", SCTP_MIB_IN_PKT_DISCARDS), + SNMP_MIB_ITEM("SctpInDataChunkDiscards", SCTP_MIB_IN_DATA_CHUNK_DISCARDS), SNMP_MIB_SENTINEL }; @@ -328,8 +343,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) "%8p %8p %-3d %-3d %-2d %-4d %4d %8d %8d %7d %5lu %-5d %5d ", assoc, sk, sctp_sk(sk)->type, sk->sk_state, assoc->state, hash, assoc->assoc_id, - (sk->sk_rcvbuf - assoc->rwnd), assoc->sndbuf_used, + (sk->sk_rcvbuf - assoc->rwnd), sock_i_uid(sk), sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); diff --git a/net/sctp/sm_statefuns.c b/net/sctp/sm_statefuns.c index 5b5ae7958322..32f57f42af9e 100644 --- a/net/sctp/sm_statefuns.c +++ b/net/sctp/sm_statefuns.c @@ -2663,9 +2663,11 @@ sctp_disposition_t sctp_sf_eat_data_6_2(const struct sctp_endpoint *ep, break; case SCTP_IERROR_HIGH_TSN: case SCTP_IERROR_BAD_STREAM: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_noforce; case SCTP_IERROR_DUP_TSN: case SCTP_IERROR_IGNORE_TSN: + SCTP_INC_STATS(SCTP_MIB_IN_DATA_CHUNK_DISCARDS); goto discard_force; case SCTP_IERROR_NO_DATA: goto consume; @@ -3652,6 +3654,7 @@ sctp_disposition_t sctp_sf_pdiscard(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_IN_PKT_DISCARDS); sctp_add_cmd_sf(commands, SCTP_CMD_DISCARD_PACKET, SCTP_NULL()); return SCTP_DISPOSITION_CONSUME; @@ -4548,6 +4551,8 @@ sctp_disposition_t sctp_sf_do_6_3_3_rtx(const struct sctp_endpoint *ep, { struct sctp_transport *transport = arg; + SCTP_INC_STATS(SCTP_MIB_T3_RTX_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4616,6 +4621,7 @@ sctp_disposition_t sctp_sf_do_6_2_sack(const struct sctp_endpoint *ep, void *arg, sctp_cmd_seq_t *commands) { + SCTP_INC_STATS(SCTP_MIB_DELAY_SACK_EXPIREDS); sctp_add_cmd_sf(commands, SCTP_CMD_GEN_SACK, SCTP_FORCE()); return SCTP_DISPOSITION_CONSUME; } @@ -4650,6 +4656,7 @@ sctp_disposition_t sctp_sf_t1_init_timer_expire(const struct sctp_endpoint *ep, int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (INIT).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_INIT_EXPIREDS); if (attempts <= asoc->max_init_attempts) { bp = (struct sctp_bind_addr *) &asoc->base.bind_addr; @@ -4709,6 +4716,7 @@ sctp_disposition_t sctp_sf_t1_cookie_timer_expire(const struct sctp_endpoint *ep int attempts = asoc->init_err_counter + 1; SCTP_DEBUG_PRINTK("Timer T1 expired (COOKIE-ECHO).\n"); + SCTP_INC_STATS(SCTP_MIB_T1_COOKIE_EXPIREDS); if (attempts <= asoc->max_init_attempts) { repl = sctp_make_cookie_echo(asoc, NULL); @@ -4753,6 +4761,8 @@ sctp_disposition_t sctp_sf_t2_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T2 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T2_SHUTDOWN_EXPIREDS); + if (asoc->overall_error_count >= asoc->max_retrans) { sctp_add_cmd_sf(commands, SCTP_CMD_SET_SK_ERR, SCTP_ERROR(ETIMEDOUT)); @@ -4814,6 +4824,8 @@ sctp_disposition_t sctp_sf_t4_timer_expire( struct sctp_chunk *chunk = asoc->addip_last_asconf; struct sctp_transport *transport = chunk->transport; + SCTP_INC_STATS(SCTP_MIB_T4_RTO_EXPIREDS); + /* ADDIP 4.1 B1) Increment the error counters and perform path failure * detection on the appropriate destination address as defined in * RFC2960 [5] section 8.1 and 8.2. @@ -4880,6 +4892,7 @@ sctp_disposition_t sctp_sf_t5_timer_expire(const struct sctp_endpoint *ep, struct sctp_chunk *reply = NULL; SCTP_DEBUG_PRINTK("Timer T5 expired.\n"); + SCTP_INC_STATS(SCTP_MIB_T5_SHUTDOWN_GUARD_EXPIREDS); reply = sctp_make_abort(asoc, NULL, 0); if (!reply) @@ -4910,6 +4923,8 @@ sctp_disposition_t sctp_sf_autoclose_timer_expire( { int disposition; + SCTP_INC_STATS(SCTP_MIB_AUTOCLOSE_EXPIREDS); + /* From 9.2 Shutdown of an Association * Upon receipt of the SHUTDOWN primitive from its upper * layer, the endpoint enters SHUTDOWN-PENDING state and -- cgit v1.3-8-gc7d7 From 3fd091e73b81f131e1567c4d4a1ec042940bf2f7 Mon Sep 17 00:00:00 2001 From: Vladislav Yasevich Date: Tue, 22 Aug 2006 13:29:17 -0700 Subject: [SCTP]: Remove multiple levels of msecs to jiffies conversions. The SCTP sysctl entries are displayed in milliseconds, but stored internally in jiffies. This results in multiple levels of msecs to jiffies conversion and as a result produces a truncation error. This patch makes things consistent in that we store and display defaults in milliseconds and only convert once for use by association. This patch also adds some sane min/max values so that we don't go off the deep end. Signed-off-by: Vladislav Yasevich Signed-off-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/constants.h | 13 ++-- include/net/sctp/structs.h | 12 ++-- net/sctp/protocol.c | 2 +- net/sctp/socket.c | 15 +++-- net/sctp/sysctl.c | 140 +++++++++++++++++++------------------------ net/sctp/transport.c | 2 +- 6 files changed, 84 insertions(+), 100 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/constants.h b/include/net/sctp/constants.h index 57166bfdf8eb..6c632e26f72d 100644 --- a/include/net/sctp/constants.h +++ b/include/net/sctp/constants.h @@ -264,10 +264,10 @@ enum { SCTP_MAX_DUP_TSNS = 16 }; enum { SCTP_MAX_GABS = 16 }; /* Heartbeat interval - 30 secs */ -#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30 * HZ) +#define SCTP_DEFAULT_TIMEOUT_HEARTBEAT (30*1000) /* Delayed sack timer - 200ms */ -#define SCTP_DEFAULT_TIMEOUT_SACK ((200 * HZ) / 1000) +#define SCTP_DEFAULT_TIMEOUT_SACK (200) /* RTO.Initial - 3 seconds * RTO.Min - 1 second @@ -275,9 +275,9 @@ enum { SCTP_MAX_GABS = 16 }; * RTO.Alpha - 1/8 * RTO.Beta - 1/4 */ -#define SCTP_RTO_INITIAL (3 * HZ) -#define SCTP_RTO_MIN (1 * HZ) -#define SCTP_RTO_MAX (60 * HZ) +#define SCTP_RTO_INITIAL (3 * 1000) +#define SCTP_RTO_MIN (1 * 1000) +#define SCTP_RTO_MAX (60 * 1000) #define SCTP_RTO_ALPHA 3 /* 1/8 when converted to right shifts. */ #define SCTP_RTO_BETA 2 /* 1/4 when converted to right shifts. */ @@ -290,8 +290,7 @@ enum { SCTP_MAX_GABS = 16 }; #define SCTP_DEF_MAX_INIT 6 #define SCTP_DEF_MAX_SEND 10 -#define SCTP_DEFAULT_COOKIE_LIFE_SEC 60 /* seconds */ -#define SCTP_DEFAULT_COOKIE_LIFE_USEC 0 /* microseconds */ +#define SCTP_DEFAULT_COOKIE_LIFE (60 * 1000) /* 60 seconds */ #define SCTP_DEFAULT_MINWINDOW 1500 /* default minimum rwnd size */ #define SCTP_DEFAULT_MAXWINDOW 65535 /* default rwnd size */ diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 0412e730c765..c6d93bb0dcd2 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -128,9 +128,9 @@ extern struct sctp_globals { * RTO.Alpha - 1/8 (3 when converted to right shifts.) * RTO.Beta - 1/4 (2 when converted to right shifts.) */ - unsigned long rto_initial; - unsigned long rto_min; - unsigned long rto_max; + unsigned int rto_initial; + unsigned int rto_min; + unsigned int rto_max; /* Note: rto_alpha and rto_beta are really defined as inverse * powers of two to facilitate integer operations. @@ -145,13 +145,13 @@ extern struct sctp_globals { int cookie_preserve_enable; /* Valid.Cookie.Life - 60 seconds */ - unsigned long valid_cookie_life; + unsigned int valid_cookie_life; /* Delayed SACK timeout 200ms default*/ - unsigned long sack_timeout; + unsigned int sack_timeout; /* HB.interval - 30 seconds */ - unsigned long hb_interval; + unsigned int hb_interval; /* Association.Max.Retrans - 10 attempts * Path.Max.Retrans - 5 attempts (per destination address) diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index 1ab03a27a76e..5692ef5485d3 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -1049,7 +1049,7 @@ SCTP_STATIC __init int sctp_init(void) sctp_rto_beta = SCTP_RTO_BETA; /* Valid.Cookie.Life - 60 seconds */ - sctp_valid_cookie_life = 60 * HZ; + sctp_valid_cookie_life = SCTP_DEFAULT_COOKIE_LIFE; /* Whether Cookie Preservative is enabled(1) or not(0) */ sctp_cookie_preserve_enable = 1; diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 3b6e82cb372f..7c1dbb1d10df 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -3045,14 +3045,14 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->initmsg.sinit_num_ostreams = sctp_max_outstreams; sp->initmsg.sinit_max_instreams = sctp_max_instreams; sp->initmsg.sinit_max_attempts = sctp_max_retrans_init; - sp->initmsg.sinit_max_init_timeo = jiffies_to_msecs(sctp_rto_max); + sp->initmsg.sinit_max_init_timeo = sctp_rto_max; /* Initialize default RTO related parameters. These parameters can * be modified for with the SCTP_RTOINFO socket option. */ - sp->rtoinfo.srto_initial = jiffies_to_msecs(sctp_rto_initial); - sp->rtoinfo.srto_max = jiffies_to_msecs(sctp_rto_max); - sp->rtoinfo.srto_min = jiffies_to_msecs(sctp_rto_min); + sp->rtoinfo.srto_initial = sctp_rto_initial; + sp->rtoinfo.srto_max = sctp_rto_max; + sp->rtoinfo.srto_min = sctp_rto_min; /* Initialize default association related parameters. These parameters * can be modified with the SCTP_ASSOCINFO socket option. @@ -3061,8 +3061,7 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) sp->assocparams.sasoc_number_peer_destinations = 0; sp->assocparams.sasoc_peer_rwnd = 0; sp->assocparams.sasoc_local_rwnd = 0; - sp->assocparams.sasoc_cookie_life = - jiffies_to_msecs(sctp_valid_cookie_life); + sp->assocparams.sasoc_cookie_life = sctp_valid_cookie_life; /* Initialize default event subscriptions. By default, all the * options are off. @@ -3072,10 +3071,10 @@ SCTP_STATIC int sctp_init_sock(struct sock *sk) /* Default Peer Address Parameters. These defaults can * be modified via SCTP_PEER_ADDR_PARAMS */ - sp->hbinterval = jiffies_to_msecs(sctp_hb_interval); + sp->hbinterval = sctp_hb_interval; sp->pathmaxrxt = sctp_max_retrans_path; sp->pathmtu = 0; // allow default discovery - sp->sackdelay = jiffies_to_msecs(sctp_sack_timeout); + sp->sackdelay = sctp_sack_timeout; sp->param_flags = SPP_HB_ENABLE | SPP_PMTUD_ENABLE | SPP_SACKDELAY_ENABLE; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index dc6f3ff32358..633cd178654b 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -45,9 +45,10 @@ #include #include -static ctl_handler sctp_sysctl_jiffies_ms; -static long rto_timer_min = 1; -static long rto_timer_max = 86400000; /* One day */ +static int zero = 0; +static int one = 1; +static int timer_max = 86400000; /* ms in one day */ +static int int_max = INT_MAX; static long sack_timer_min = 1; static long sack_timer_max = 500; @@ -56,45 +57,45 @@ static ctl_table sctp_table[] = { .ctl_name = NET_SCTP_RTO_INITIAL, .procname = "rto_initial", .data = &sctp_rto_initial, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MIN, .procname = "rto_min", .data = &sctp_rto_min, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_RTO_MAX, .procname = "rto_max", .data = &sctp_rto_max, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_VALID_COOKIE_LIFE, .procname = "valid_cookie_life", .data = &sctp_valid_cookie_life, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_MAX_BURST, @@ -102,7 +103,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_burst, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &zero, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_ASSOCIATION_MAX_RETRANS, @@ -110,7 +114,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_association, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_SNDBUF_POLICY, @@ -118,7 +125,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sndbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RCVBUF_POLICY, @@ -126,7 +134,8 @@ static ctl_table sctp_table[] = { .data = &sctp_rcvbuf_policy, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PATH_MAX_RETRANS, @@ -134,7 +143,10 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_path, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_MAX_INIT_RETRANSMITS, @@ -142,18 +154,21 @@ static ctl_table sctp_table[] = { .data = &sctp_max_retrans_init, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &int_max }, { .ctl_name = NET_SCTP_HB_INTERVAL, .procname = "hb_interval", .data = &sctp_hb_interval, - .maxlen = sizeof(long), + .maxlen = sizeof(unsigned int), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, - .extra1 = &rto_timer_min, - .extra2 = &rto_timer_max + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, + .extra1 = &one, + .extra2 = &timer_max }, { .ctl_name = NET_SCTP_PRESERVE_ENABLE, @@ -161,23 +176,26 @@ static ctl_table sctp_table[] = { .data = &sctp_cookie_preserve_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_ALPHA, .procname = "rto_alpha_exp_divisor", .data = &sctp_rto_alpha, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_RTO_BETA, .procname = "rto_beta_exp_divisor", .data = &sctp_rto_beta, .maxlen = sizeof(int), - .mode = 0644, - .proc_handler = &proc_dointvec + .mode = 0444, + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_ADDIP_ENABLE, @@ -185,7 +203,8 @@ static ctl_table sctp_table[] = { .data = &sctp_addip_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_PRSCTP_ENABLE, @@ -193,7 +212,8 @@ static ctl_table sctp_table[] = { .data = &sctp_prsctp_enable, .maxlen = sizeof(int), .mode = 0644, - .proc_handler = &proc_dointvec + .proc_handler = &proc_dointvec, + .strategy = &sysctl_intvec }, { .ctl_name = NET_SCTP_SACK_TIMEOUT, @@ -201,8 +221,8 @@ static ctl_table sctp_table[] = { .data = &sctp_sack_timeout, .maxlen = sizeof(long), .mode = 0644, - .proc_handler = &proc_doulongvec_ms_jiffies_minmax, - .strategy = &sctp_sysctl_jiffies_ms, + .proc_handler = &proc_dointvec_minmax, + .strategy = &sysctl_intvec, .extra1 = &sack_timer_min, .extra2 = &sack_timer_max, }, @@ -242,37 +262,3 @@ void sctp_sysctl_unregister(void) { unregister_sysctl_table(sctp_sysctl_header); } - -/* Strategy function to convert jiffies to milliseconds. */ -static int sctp_sysctl_jiffies_ms(ctl_table *table, int __user *name, int nlen, - void __user *oldval, size_t __user *oldlenp, - void __user *newval, size_t newlen, void **context) { - - if (oldval) { - size_t olen; - - if (oldlenp) { - if (get_user(olen, oldlenp)) - return -EFAULT; - - if (olen != sizeof (int)) - return -EINVAL; - } - if (put_user((*(int *)(table->data) * 1000) / HZ, - (int __user *)oldval) || - (oldlenp && put_user(sizeof (int), oldlenp))) - return -EFAULT; - } - if (newval && newlen) { - int new; - - if (newlen != sizeof (int)) - return -EINVAL; - - if (get_user(new, (int __user *)newval)) - return -EFAULT; - - *(int *)(table->data) = (new * HZ) / 1000; - } - return 1; -} diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 2763aa93de1a..3e5936a5f671 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -75,7 +75,7 @@ static struct sctp_transport *sctp_transport_init(struct sctp_transport *peer, * parameter 'RTO.Initial'. */ peer->rtt = 0; - peer->rto = sctp_rto_initial; + peer->rto = msecs_to_jiffies(sctp_rto_initial); peer->rttvar = 0; peer->srtt = 0; peer->rto_pending = 0; -- cgit v1.3-8-gc7d7 From 5e032e32ecc2e6cb0385dc115ca9bfe5e19a9539 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:12:24 -0700 Subject: [IPV6] NDISC: Take source address into account for redirects. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/ndisc.c | 3 ++- net/ipv6/route.c | 5 +++-- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 3f170f667c7b..249ce4545ef0 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -110,6 +110,7 @@ extern int rt6_route_rcv(struct net_device *dev, struct in6_addr *gwaddr); extern void rt6_redirect(struct in6_addr *dest, + struct in6_addr *src, struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 419d65163819..32f28dec399e 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1346,7 +1346,8 @@ static void ndisc_redirect_rcv(struct sk_buff *skb) neigh = __neigh_lookup(&nd_tbl, target, skb->dev, 1); if (neigh) { - rt6_redirect(dest, &skb->nh.ipv6h->saddr, neigh, lladdr, + rt6_redirect(dest, &skb->nh.ipv6h->daddr, + &skb->nh.ipv6h->saddr, neigh, lladdr, on_link); neigh_release(neigh); } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 5d6e9083ca2c..a9b08a2422e0 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1279,7 +1279,8 @@ static int ip6_route_del(struct fib6_config *cfg) /* * Handle redirects */ -void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, +void rt6_redirect(struct in6_addr *dest, struct in6_addr *src, + struct in6_addr *saddr, struct neighbour *neigh, u8 *lladdr, int on_link) { struct rt6_info *rt, *nrt = NULL; @@ -1304,7 +1305,7 @@ void rt6_redirect(struct in6_addr *dest, struct in6_addr *saddr, */ read_lock_bh(&table->tb6_lock); - fn = fib6_lookup(&table->tb6_root, dest, NULL); + fn = fib6_lookup(&table->tb6_root, dest, src); restart: for (rt = fn->leaf; rt; rt = rt->u.next) { /* -- cgit v1.3-8-gc7d7 From 8e1ef0a95b87e8b4292b2ba733e8cb854ea2d2fe Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Tue, 29 Aug 2006 17:15:09 -0700 Subject: [IPV6]: Cache source address as well in ipv6_pinfo{}. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 +++ include/net/ip6_route.h | 9 ++++++--- net/dccp/ipv6.c | 4 ++-- net/ipv6/af_inet6.c | 2 +- net/ipv6/datagram.c | 7 ++++++- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_output.c | 3 +++ net/ipv6/tcp_ipv6.c | 4 ++-- net/ipv6/udp.c | 7 ++++++- 9 files changed, 30 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 297853c841b4..02d14a3ff2af 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -242,6 +242,9 @@ struct ipv6_pinfo { struct in6_addr rcv_saddr; struct in6_addr daddr; struct in6_addr *daddr_cache; +#ifdef CONFIG_IPV6_SUBTREES + struct in6_addr *saddr_cache; +#endif __u32 flow_label; __u32 frag_size; diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 249ce4545ef0..0d40f84df21b 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -144,21 +144,24 @@ extern rwlock_t rt6_lock; * Store a destination cache entry in a socket */ static inline void __ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { struct ipv6_pinfo *np = inet6_sk(sk); struct rt6_info *rt = (struct rt6_info *) dst; sk_setup_caps(sk, dst); np->daddr_cache = daddr; +#ifdef CONFIG_IPV6_SUBTREES + np->saddr_cache = saddr; +#endif np->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; } static inline void ip6_dst_store(struct sock *sk, struct dst_entry *dst, - struct in6_addr *daddr) + struct in6_addr *daddr, struct in6_addr *saddr) { write_lock(&sk->sk_dst_lock); - __ip6_dst_store(sk, dst, daddr); + __ip6_dst_store(sk, dst, daddr, saddr); write_unlock(&sk->sk_dst_lock); } diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index 231bc7c7e749..f9c5e12d7038 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -231,7 +231,7 @@ static int dccp_v6_connect(struct sock *sk, struct sockaddr *uaddr, ipv6_addr_copy(&np->saddr, saddr); inet->rcv_saddr = LOOPBACK4_IPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt != NULL) @@ -872,7 +872,7 @@ static struct sock *dccp_v6_request_recv_sock(struct sock *sk, * comment in that function for the gory details. -acme */ - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newsk->sk_route_caps = dst->dev->features & ~(NETIF_F_IP_CSUM | NETIF_F_TSO); newdp6 = (struct dccp6_sock *)newsk; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 2ff600cfe3a4..57ee5ddea96f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -659,7 +659,7 @@ int inet6_sk_rebuild_header(struct sock *sk) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } return 0; diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index c73508e090a6..8561b9da6db6 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -193,7 +193,12 @@ ipv4_connected: ip6_dst_store(sk, dst, ipv6_addr_equal(&fl.fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl.fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); sk->sk_state = TCP_ESTABLISHED; out: diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 7a51a258615d..827f41d1478b 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -186,7 +186,7 @@ int inet6_csk_xmit(struct sk_buff *skb, int ipfragok) return err; } - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); } skb->dst = dst_clone(dst); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 0a18cb6b1cbb..2a376b7d91b4 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -762,6 +762,9 @@ static struct dst_entry *ip6_sk_dst_check(struct sock *sk, * 2. oif also should be the same. */ if (ip6_rt_check(&rt->rt6i_dst, &fl->fl6_dst, np->daddr_cache) || +#ifdef CONFIG_IPV6_SUBTREES + ip6_rt_check(&rt->rt6i_src, &fl->fl6_src, np->saddr_cache) || +#endif (fl->oif && fl->oif != dst->dev->ifindex)) { dst_release(dst); dst = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 7f1b660493b7..2b18918f3011 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -272,7 +272,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, inet->rcv_saddr = LOOPBACK4_IPV6; sk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(sk, dst, NULL); + __ip6_dst_store(sk, dst, NULL, NULL); icsk->icsk_ext_hdr_len = 0; if (np->opt) @@ -954,7 +954,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, */ newsk->sk_gso_type = SKB_GSO_TCPV6; - __ip6_dst_store(newsk, dst, NULL); + __ip6_dst_store(newsk, dst, NULL, NULL); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index eb9e1b39c8f8..b9cc55ccb000 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -847,7 +847,12 @@ do_append_data: if (connected) { ip6_dst_store(sk, dst, ipv6_addr_equal(&fl->fl6_dst, &np->daddr) ? - &np->daddr : NULL); + &np->daddr : NULL, +#ifdef CONFIG_IPV6_SUBTREES + ipv6_addr_equal(&fl->fl6_src, &np->saddr) ? + &np->saddr : +#endif + NULL); } else { dst_release(dst); } -- cgit v1.3-8-gc7d7 From 7fc33165a74301b2c5c90b2f2a1f6907cbd5c6f1 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:22:24 -0700 Subject: [IPV6] ROUTE: Put SUBTREE() as FIB6_SUBTREE() into ip6_fib.h for future use. Based on MIPL2 kernel patch. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: Ville Nuorvala Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 +++++ net/ipv6/ip6_fib.c | 20 +++++++++----------- 2 files changed, 14 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 9610b887ffb5..6a3f26a04509 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -60,6 +60,11 @@ struct fib6_node __u32 fn_sernum; }; +#ifndef CONFIG_IPV6_SUBTREES +#define FIB6_SUBTREE(fn) NULL +#else +#define FIB6_SUBTREE(fn) ((fn)->subtree) +#endif /* * routing information diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index b706424e70b8..6536e33d8353 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -73,10 +73,8 @@ static DEFINE_RWLOCK(fib6_walker_lock); #ifdef CONFIG_IPV6_SUBTREES #define FWS_INIT FWS_S -#define SUBTREE(fn) ((fn)->subtree) #else #define FWS_INIT FWS_L -#define SUBTREE(fn) NULL #endif static void fib6_prune_clones(struct fib6_node *fn, struct rt6_info *rt); @@ -854,7 +852,7 @@ static struct fib6_node * fib6_lookup_1(struct fib6_node *root, } while(fn) { - if (SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { + if (FIB6_SUBTREE(fn) || fn->fn_flags & RTN_RTINFO) { struct rt6key *key; key = (struct rt6key *) ((u8 *) fn->leaf + @@ -985,7 +983,7 @@ static struct rt6_info * fib6_find_prefix(struct fib6_node *fn) if(fn->right) return fn->right->leaf; - fn = SUBTREE(fn); + fn = FIB6_SUBTREE(fn); } return NULL; } @@ -1016,7 +1014,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) if (fn->right) child = fn->right, children |= 1; if (fn->left) child = fn->left, children |= 2; - if (children == 3 || SUBTREE(fn) + if (children == 3 || FIB6_SUBTREE(fn) #ifdef CONFIG_IPV6_SUBTREES /* Subtree root (i.e. fn) may have one child */ || (children && fn->fn_flags&RTN_ROOT) @@ -1035,9 +1033,9 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) pn = fn->parent; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); - SUBTREE(pn) = NULL; + FIB6_SUBTREE(pn) = NULL; nstate = FWS_L; } else { BUG_TRAP(!(fn->fn_flags&RTN_ROOT)); @@ -1085,7 +1083,7 @@ static struct fib6_node * fib6_repair_tree(struct fib6_node *fn) read_unlock(&fib6_walker_lock); node_free(fn); - if (pn->fn_flags&RTN_RTINFO || SUBTREE(pn)) + if (pn->fn_flags&RTN_RTINFO || FIB6_SUBTREE(pn)) return pn; rt6_release(pn->leaf); @@ -1228,8 +1226,8 @@ static int fib6_walk_continue(struct fib6_walker_t *w) switch (w->state) { #ifdef CONFIG_IPV6_SUBTREES case FWS_S: - if (SUBTREE(fn)) { - w->node = SUBTREE(fn); + if (FIB6_SUBTREE(fn)) { + w->node = FIB6_SUBTREE(fn); continue; } w->state = FWS_L; @@ -1263,7 +1261,7 @@ static int fib6_walk_continue(struct fib6_walker_t *w) pn = fn->parent; w->node = pn; #ifdef CONFIG_IPV6_SUBTREES - if (SUBTREE(pn) == fn) { + if (FIB6_SUBTREE(pn) == fn) { BUG_TRAP(fn->fn_flags&RTN_ROOT); w->state = FWS_L; continue; -- cgit v1.3-8-gc7d7 From 77d16f450ae0452d7d4b009f78debb1294fb435c Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Wed, 23 Aug 2006 17:25:05 -0700 Subject: [IPV6] ROUTE: Unify RT6_F_xxx and RT6_SELECT_F_xxx flags Unify RT6_F_xxx and RT6_SELECT_F_xxx flags into RT6_LOOKUP_F_xxx flags, and put them into ip6_route.h Signed-off-by: YOSHIFUJI Hideaki Acked-by: Ville Nuorvala --- include/net/ip6_fib.h | 3 --- include/net/ip6_route.h | 4 ++++ net/ipv6/fib6_rules.c | 2 +- net/ipv6/route.c | 32 ++++++++++++-------------------- 4 files changed, 17 insertions(+), 24 deletions(-) (limited to 'include/net') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 6a3f26a04509..e4438de3bd6b 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -173,9 +173,6 @@ struct fib6_table { #define RT6_TABLE_LOCAL RT6_TABLE_MAIN #endif -#define RT6_F_STRICT 1 -#define RT6_F_HAS_SADDR 2 - typedef struct rt6_info *(*pol_lookup_t)(struct fib6_table *, struct flowi *, int); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 0d40f84df21b..297909570041 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -32,6 +32,10 @@ struct route_info { #include #include +#define RT6_LOOKUP_F_IFACE 0x1 +#define RT6_LOOKUP_F_REACHABLE 0x2 +#define RT6_LOOKUP_F_HAS_SADDR 0x4 + struct pol_chain { int type; int priority; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 2c4fbc855e6c..7b4908cc52b3 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -117,7 +117,7 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (!ipv6_prefix_equal(&fl->fl6_dst, &r->dst.addr, r->dst.plen)) return 0; - if ((flags & RT6_F_HAS_SADDR) && + if ((flags & RT6_LOOKUP_F_HAS_SADDR) && !ipv6_prefix_equal(&fl->fl6_src, &r->src.addr, r->src.plen)) return 0; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fd6f2ec4fa09..20691285aee5 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -76,9 +76,6 @@ #define CLONE_OFFLINK_ROUTE 0 -#define RT6_SELECT_F_IFACE 0x1 -#define RT6_SELECT_F_REACHABLE 0x2 - static int ip6_rt_max_size = 4096; static int ip6_rt_gc_min_interval = HZ / 2; static int ip6_rt_gc_timeout = 60*HZ; @@ -340,7 +337,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, int m, n; m = rt6_check_dev(rt, oif); - if (!m && (strict & RT6_SELECT_F_IFACE)) + if (!m && (strict & RT6_LOOKUP_F_IFACE)) return -1; #ifdef CONFIG_IPV6_ROUTER_PREF m |= IPV6_DECODE_PREF(IPV6_EXTRACT_PREF(rt->rt6i_flags)) << 2; @@ -348,7 +345,7 @@ static int rt6_score_route(struct rt6_info *rt, int oif, n = rt6_check_neigh(rt); if (n > 1) m |= 16; - else if (!n && strict & RT6_SELECT_F_REACHABLE) + else if (!n && strict & RT6_LOOKUP_F_REACHABLE) return -1; return m; } @@ -388,7 +385,7 @@ static struct rt6_info *rt6_select(struct rt6_info **head, int oif, } if (!match && - (strict & RT6_SELECT_F_REACHABLE) && + (strict & RT6_LOOKUP_F_REACHABLE) && last && last != rt0) { /* no entries matched; do round-robin */ static DEFINE_SPINLOCK(lock); @@ -511,7 +508,7 @@ static struct rt6_info *ip6_pol_route_lookup(struct fib6_table *table, fn = fib6_lookup(&table->tb6_root, &fl->fl6_dst, &fl->fl6_src); restart: rt = fn->leaf; - rt = rt6_device_match(rt, fl->oif, flags & RT6_F_STRICT); + rt = rt6_device_match(rt, fl->oif, flags); BACKTRACK(&fl->fl6_src); dst_hold(&rt->u.dst); out: @@ -537,7 +534,7 @@ struct rt6_info *rt6_lookup(struct in6_addr *daddr, struct in6_addr *saddr, }, }; struct dst_entry *dst; - int flags = strict ? RT6_F_STRICT : 0; + int flags = strict ? RT6_LOOKUP_F_IFACE : 0; dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_lookup); if (dst->error == 0) @@ -633,10 +630,9 @@ static struct rt6_info *ip6_pol_route_input(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - if (flags & RT6_F_STRICT) - strict = RT6_SELECT_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: read_lock_bh(&table->tb6_lock); @@ -712,10 +708,7 @@ void ip6_route_input(struct sk_buff *skb) }, .proto = iph->nexthdr, }; - int flags = 0; - - if (rt6_need_strict(&iph->daddr)) - flags |= RT6_F_STRICT; + int flags = rt6_need_strict(&iph->daddr) ? RT6_LOOKUP_F_IFACE : 0; skb->dst = fib6_rule_lookup(&fl, flags, ip6_pol_route_input); } @@ -728,10 +721,9 @@ static struct rt6_info *ip6_pol_route_output(struct fib6_table *table, int strict = 0; int attempts = 3; int err; - int reachable = RT6_SELECT_F_REACHABLE; + int reachable = RT6_LOOKUP_F_REACHABLE; - if (flags & RT6_F_STRICT) - strict = RT6_SELECT_F_IFACE; + strict |= flags & RT6_LOOKUP_F_IFACE; relookup: read_lock_bh(&table->tb6_lock); @@ -797,7 +789,7 @@ struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl) int flags = 0; if (rt6_need_strict(&fl->fl6_dst)) - flags |= RT6_F_STRICT; + flags |= RT6_LOOKUP_F_IFACE; return fib6_rule_lookup(fl, flags, ip6_pol_route_output); } @@ -1362,7 +1354,7 @@ static struct rt6_info *ip6_route_redirect(struct in6_addr *dest, }, .gateway = *gateway, }; - int flags = rt6_need_strict(dest) ? RT6_F_STRICT : 0; + int flags = rt6_need_strict(dest) ? RT6_LOOKUP_F_IFACE : 0; return (struct rt6_info *)fib6_rule_lookup((struct flowi *)&rdfl, flags, __ip6_route_redirect); } -- cgit v1.3-8-gc7d7 From 7e49e6de30efa716614e280d97963c570f3acf29 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:05:15 -0700 Subject: [XFRM]: Add XFRM_MODE_xxx for future use. Transformation mode is used as either IPsec transport or tunnel. It is required to add two more items, route optimization and inbound trigger for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 6 ++++-- include/net/xfrm.h | 2 +- net/ipv4/ah4.c | 2 +- net/ipv4/esp4.c | 6 +++--- net/ipv4/ipcomp.c | 8 ++++---- net/ipv4/xfrm4_input.c | 2 +- net/ipv4/xfrm4_output.c | 4 ++-- net/ipv4/xfrm4_policy.c | 2 +- net/ipv4/xfrm4_state.c | 2 +- net/ipv4/xfrm4_tunnel.c | 2 +- net/ipv6/ah6.c | 2 +- net/ipv6/esp6.c | 4 ++-- net/ipv6/ipcomp6.c | 6 +++--- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_output.c | 4 ++-- net/ipv6/xfrm6_policy.c | 2 +- net/ipv6/xfrm6_state.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/key/af_key.c | 6 +++--- net/xfrm/xfrm_policy.c | 11 ++++++----- net/xfrm/xfrm_user.c | 4 ++-- 21 files changed, 42 insertions(+), 39 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 46a15c7a1a13..5154064b6d95 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -120,7 +120,9 @@ enum #define XFRM_MODE_TRANSPORT 0 #define XFRM_MODE_TUNNEL 1 -#define XFRM_MODE_MAX 2 +#define XFRM_MODE_ROUTEOPTIMIZATION 2 +#define XFRM_MODE_IN_TRIGGER 3 +#define XFRM_MODE_MAX 4 /* Netlink configuration messages. */ enum { @@ -247,7 +249,7 @@ struct xfrm_usersa_info { __u32 seq; __u32 reqid; __u16 family; - __u8 mode; /* 0=transport,1=tunnel */ + __u8 mode; /* XFRM_MODE_xxx */ __u8 replay_window; __u8 flags; #define XFRM_STATE_NOECN 1 diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 00bf86e6e82b..762795624b10 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -298,7 +298,7 @@ struct xfrm_tmpl __u32 reqid; -/* Mode: transport/tunnel */ +/* Mode: transport, tunnel etc. */ __u8 mode; /* Sharing mode: unique, this session only, this user only etc. */ diff --git a/net/ipv4/ah4.c b/net/ipv4/ah4.c index 008e69d2e423..99542977e47e 100644 --- a/net/ipv4/ah4.c +++ b/net/ipv4/ah4.c @@ -265,7 +265,7 @@ static int ah_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ip_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); x->data = ahp; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index b428489f6ccd..e87377e1d6b6 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -248,7 +248,7 @@ static int esp_input(struct xfrm_state *x, struct sk_buff *skb) * as per draft-ietf-ipsec-udp-encaps-06, * section 3.1.2 */ - if (!x->props.mode) + if (x->props.mode == XFRM_MODE_TRANSPORT) skb->ip_summed = CHECKSUM_UNNECESSARY; } @@ -267,7 +267,7 @@ static u32 esp4_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -383,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ip_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); if (x->encap) { struct xfrm_encap_tmpl *encap = x->encap; diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 5bb9c9f03fb6..17342430a843 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -176,7 +176,7 @@ static int ipcomp_output(struct xfrm_state *x, struct sk_buff *skb) return 0; out_ok: - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) ip_send_check(iph); return 0; } @@ -216,7 +216,7 @@ static struct xfrm_state *ipcomp_tunnel_create(struct xfrm_state *x) t->id.daddr.a4 = x->id.daddr.a4; memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; t->props.saddr.a4 = x->props.saddr.a4; t->props.flags = x->props.flags; @@ -416,7 +416,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct iphdr); mutex_lock(&ipcomp_resource_mutex); @@ -428,7 +428,7 @@ static int ipcomp_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 817ed84511a6..040e8475f295 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -106,7 +106,7 @@ int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { decaps = 1; break; } diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 4a96a9e3ef3b..5fd115f0c547 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -54,7 +54,7 @@ static int xfrm4_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm4_tunnel_check_size(skb); if (err) goto error_nolock; @@ -85,7 +85,7 @@ static int xfrm4_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 8f50eae47d03..a5bed741de2c 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -96,7 +96,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = xfrm[i]->id.daddr.a4; local = xfrm[i]->props.saddr.a4; tunnel = 1; diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 81e1751c966e..97b0c7589711 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -42,7 +42,7 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode && x->props.saddr.a4 == 0) { + if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) { struct rtable *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv4/xfrm4_tunnel.c b/net/ipv4/xfrm4_tunnel.c index f8ceaa127c83..f110af5b1319 100644 --- a/net/ipv4/xfrm4_tunnel.c +++ b/net/ipv4/xfrm4_tunnel.c @@ -28,7 +28,7 @@ static int ipip_xfrm_rcv(struct xfrm_state *x, struct sk_buff *skb) static int ipip_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 00ffa7bc6c9f..60954fc7eb36 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -398,7 +398,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; x->props.header_len = XFRM_ALIGN8(sizeof(struct ipv6_auth_hdr) + ahp->icv_trunc_len); - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = ahp; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2ebfd281e721..2b8e52e1d0ab 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -237,7 +237,7 @@ static u32 esp6_get_max_size(struct xfrm_state *x, int mtu) struct esp_data *esp = x->data; u32 blksize = ALIGN(crypto_blkcipher_blocksize(esp->conf.tfm), 4); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { mtu = ALIGN(mtu + 2, blksize); } else { /* The worst case. */ @@ -358,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x) if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; x->props.header_len = sizeof(struct ipv6_esp_hdr) + esp->conf.ivlen; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); x->data = esp; return 0; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index a81e9e9d93bd..19eba8d9f851 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -212,7 +212,7 @@ static struct xfrm_state *ipcomp6_tunnel_create(struct xfrm_state *x) memcpy(t->id.daddr.a6, x->id.daddr.a6, sizeof(struct in6_addr)); memcpy(&t->sel, &x->sel, sizeof(t->sel)); t->props.family = AF_INET6; - t->props.mode = 1; + t->props.mode = XFRM_MODE_TUNNEL; memcpy(t->props.saddr.a6, x->props.saddr.a6, sizeof(struct in6_addr)); if (xfrm_init_state(t)) @@ -417,7 +417,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto out; x->props.header_len = 0; - if (x->props.mode) + if (x->props.mode == XFRM_MODE_TUNNEL) x->props.header_len += sizeof(struct ipv6hdr); mutex_lock(&ipcomp6_resource_mutex); @@ -429,7 +429,7 @@ static int ipcomp6_init_state(struct xfrm_state *x) goto error; mutex_unlock(&ipcomp6_resource_mutex); - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = ipcomp6_tunnel_attach(x); if (err) goto error_tunnel; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 0405d74ff910..ee2f6b3908b6 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -72,7 +72,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) if (x->mode->input(x, skb)) goto drop; - if (x->props.mode) { /* XXX */ + if (x->props.mode == XFRM_MODE_TUNNEL) { /* XXX */ decaps = 1; break; } diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 6d111743e508..26f18869f77b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -47,7 +47,7 @@ static int xfrm6_output_one(struct sk_buff *skb) goto error_nolock; } - if (x->props.mode) { + if (x->props.mode == XFRM_MODE_TUNNEL) { err = xfrm6_tunnel_check_size(skb); if (err) goto error_nolock; @@ -80,7 +80,7 @@ static int xfrm6_output_one(struct sk_buff *skb) } dst = skb->dst; x = dst->xfrm; - } while (x && !x->props.mode); + } while (x && (x->props.mode != XFRM_MODE_TUNNEL)); IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; err = 0; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 73cd250aecbb..81355bb50328 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -114,7 +114,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; - if (xfrm[i]->props.mode) { + if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { remote = (struct in6_addr*)&xfrm[i]->id.daddr; local = (struct in6_addr*)&xfrm[i]->props.saddr; tunnel = 1; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index b33296b3f6de..a1a1f5476442 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -42,7 +42,7 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { + if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { struct rt6_info *rt; struct flowi fl_tunnel = { .nl_u = { diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index c8f9369c2a87..59685ee8f700 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -307,7 +307,7 @@ static int xfrm6_tunnel_err(struct sk_buff *skb, struct inet6_skb_parm *opt, static int xfrm6_tunnel_init_state(struct xfrm_state *x) { - if (!x->props.mode) + if (x->props.mode != XFRM_MODE_TUNNEL) return -EINVAL; if (x->encap) diff --git a/net/key/af_key.c b/net/key/af_key.c index 797c744a8438..19e047b0e678 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1765,7 +1765,7 @@ parse_ipsecrequest(struct xfrm_policy *xp, struct sadb_x_ipsecrequest *rq) } /* addresses present only in tunnel mode */ - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); @@ -1997,7 +1997,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i int req_size; req_size = sizeof(struct sadb_x_ipsecrequest); - if (t->mode) + if (t->mode == XFRM_MODE_TUNNEL) req_size += 2*socklen; else size -= 2*socklen; @@ -2013,7 +2013,7 @@ static void pfkey_xfrm_policy2msg(struct sk_buff *skb, struct xfrm_policy *xp, i if (t->optional) rq->sadb_x_ipsecrequest_level = IPSEC_LEVEL_USE; rq->sadb_x_ipsecrequest_reqid = t->reqid; - if (t->mode) { + if (t->mode == XFRM_MODE_TUNNEL) { switch (xp->family) { case AF_INET: sin = (void*)(rq+1); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 32c963c90573..a0d58971391d 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -779,7 +779,7 @@ xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, xfrm_address_t *local = saddr; struct xfrm_tmpl *tmpl = &policy->xfrm_vec[i]; - if (tmpl->mode) { + if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; } @@ -1005,7 +1005,8 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, (x->props.reqid == tmpl->reqid || !tmpl->reqid) && x->props.mode == tmpl->mode && (tmpl->aalgos & (1<props.aalgo)) && - !(x->props.mode && xfrm_state_addr_cmp(tmpl, x, family)); + !(x->props.mode != XFRM_MODE_TRANSPORT && + xfrm_state_addr_cmp(tmpl, x, family)); } static inline int @@ -1015,14 +1016,14 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, int idx = start; if (tmpl->optional) { - if (!tmpl->mode) + if (tmpl->mode == XFRM_MODE_TRANSPORT) return start; } else start = -1; for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) break; } return start; @@ -1047,7 +1048,7 @@ EXPORT_SYMBOL(xfrm_decode_session); static inline int secpath_has_tunnel(struct sec_path *sp, int k) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) return 1; } diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index f70e158874d2..0d580ac19771 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -174,8 +174,8 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, err = -EINVAL; switch (p->mode) { - case 0: - case 1: + case XFRM_MODE_TRANSPORT: + case XFRM_MODE_TUNNEL: break; default: -- cgit v1.3-8-gc7d7 From 5794708f11551b6d19b10673abf4b0202f66b44d Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Fri, 22 Sep 2006 15:06:24 -0700 Subject: [XFRM]: Introduce a helper to compare id protocol. Put the helper to header for future use. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++++++ net/xfrm/xfrm_state.c | 6 +++--- 2 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 762795624b10..5b364b0a6a28 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -835,6 +836,11 @@ static inline int xfrm_state_kern(struct xfrm_state *x) return atomic_read(&x->tunnel_users); } +static inline int xfrm_id_proto_match(u8 proto, u8 userproto) +{ + return (userproto == IPSEC_PROTO_ANY || proto == userproto); +} + /* * xfrm algorithm information */ diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 1c796087ee78..34c038cbdf46 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -294,7 +294,7 @@ void xfrm_state_flush(u8 proto) restart: list_for_each_entry(x, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && - (proto == IPSEC_PROTO_ANY || x->id.proto == proto)) { + xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); @@ -772,7 +772,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto == IPSEC_PROTO_ANY || x->id.proto == proto) + if (xfrm_id_proto_match(x->id.proto, proto)) count++; } } @@ -783,7 +783,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), for (i = 0; i < XFRM_DST_HSIZE; i++) { list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (proto != IPSEC_PROTO_ANY && x->id.proto != proto) + if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); if (err) -- cgit v1.3-8-gc7d7 From dc00a525603650a1471c823a1e48c6505c2f9765 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:49:52 -0700 Subject: [XFRM] STATE: Allow non IPsec protocol. It will be added two more transformation protocols (routing header and destination options header) for Mobile IPv6. xfrm_id_proto_match() can be handle zero as all, IPSEC_PROTO_ANY as all IPsec and otherwise as exact one. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 ++++- net/xfrm/xfrm_user.c | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 5b364b0a6a28..2a7d2132a1ae 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -838,7 +838,10 @@ static inline int xfrm_state_kern(struct xfrm_state *x) static inline int xfrm_id_proto_match(u8 proto, u8 userproto) { - return (userproto == IPSEC_PROTO_ANY || proto == userproto); + return (!userproto || proto == userproto || + (userproto == IPSEC_PROTO_ANY && (proto == IPPROTO_AH || + proto == IPPROTO_ESP || + proto == IPPROTO_COMP))); } /* diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 0d580ac19771..41f3d51ffc33 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -542,7 +542,7 @@ static int xfrm_dump_sa(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_state_walk(IPSEC_PROTO_ANY, dump_one_state, &info); + (void) xfrm_state_walk(0, dump_one_state, &info); cb->args[0] = info.this_idx; return skb->len; -- cgit v1.3-8-gc7d7 From 622dc8281a80374873686514e46f852093d91106 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:52:01 -0700 Subject: [XFRM]: Expand XFRM_MAX_DEPTH for route optimization. XFRM_MAX_DEPTH is a limit of transformation states to be applied to the same flow. Two more extension headers are used by Mobile IPv6 transformation. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 2a7d2132a1ae..aa3be68041be 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -314,7 +314,7 @@ struct xfrm_tmpl __u32 calgos; }; -#define XFRM_MAX_DEPTH 4 +#define XFRM_MAX_DEPTH 6 struct xfrm_policy { -- cgit v1.3-8-gc7d7 From 6c44e6b7ab500d7e3e3f406c83325671be51a752 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:53:57 -0700 Subject: [XFRM] STATE: Add source address list. Support source address based searching. Mobile IPv6 will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 26 ++++++++++++++++++++++++++ net/ipv4/xfrm4_state.c | 3 +++ net/ipv6/xfrm6_state.c | 3 +++ net/xfrm/xfrm_state.c | 21 +++++++++++++++++++-- 4 files changed, 51 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa3be68041be..88145e3348d0 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -95,6 +95,7 @@ struct xfrm_state { /* Note: bydst is re-used during gc */ struct list_head bydst; + struct list_head bysrc; struct list_head byspi; atomic_t refcnt; @@ -236,6 +237,7 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; struct list_head *state_bydst; + struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, @@ -420,6 +422,30 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 97b0c7589711..c56b258fad73 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -122,6 +122,9 @@ __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); + h = __xfrm4_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index a1a1f5476442..2fb07850449f 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -126,6 +126,9 @@ __xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, add_timer(&x0->timer); xfrm_state_hold(x0); list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); + h = __xfrm6_src_hash(saddr); + xfrm_state_hold(x0); + list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); wake_up(&km_waitq); } if (x0) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 34c038cbdf46..2a9992894e69 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -45,6 +45,7 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Also, it can be used by ah/esp icmp error handler to find offending SA. */ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; DECLARE_WAIT_QUEUE_HEAD(km_waitq); @@ -200,6 +201,7 @@ struct xfrm_state *xfrm_state_alloc(void) atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); INIT_LIST_HEAD(&x->bydst); + INIT_LIST_HEAD(&x->bysrc); INIT_LIST_HEAD(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; @@ -240,6 +242,8 @@ int __xfrm_state_delete(struct xfrm_state *x) spin_lock(&xfrm_state_lock); list_del(&x->bydst); __xfrm_state_put(x); + list_del(&x->bysrc); + __xfrm_state_put(x); if (x->id.spi) { list_del(&x->byspi); __xfrm_state_put(x); @@ -415,6 +419,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x->km.state = XFRM_STATE_ACQ; list_add_tail(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); list_add(&x->byspi, xfrm_state_byspi+h); @@ -448,11 +454,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) list_add(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); + h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + list_add(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); + if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { + h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, + x->props.family); + + list_add(&x->byspi, xfrm_state_byspi+h); + xfrm_state_hold(x); + } + if (!mod_timer(&x->timer, jiffies + HZ)) xfrm_state_hold(x); @@ -1075,6 +1089,7 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) err = -ENOBUFS; else { afinfo->state_bydst = xfrm_state_bydst; + afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; } @@ -1097,6 +1112,7 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) else { xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; + afinfo->state_bysrc = NULL; afinfo->state_bydst = NULL; } } @@ -1218,6 +1234,7 @@ void __init xfrm_state_init(void) for (i=0; i Date: Wed, 23 Aug 2006 17:56:04 -0700 Subject: [XFRM] STATE: Search by address using source address list. This is a support to search transformation states by its addresses by using source address list for Mobile IPv6 usage. To use it from user-space, it is also added a message type for source address as a xfrm state option. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_state.c | 9 ++++++++ net/ipv6/xfrm6_state.c | 21 ++++++++++++++++++ net/xfrm/xfrm_state.c | 37 +++++++++++++++++++++++++++---- net/xfrm/xfrm_user.c | 59 +++++++++++++++++++++++++++++++++++++++++++++----- 6 files changed, 119 insertions(+), 10 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 5154064b6d95..66343d3d4b91 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -234,6 +234,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_VAL, XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, + XFRMA_SRCADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 88145e3348d0..d9c40e713184 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -244,6 +244,7 @@ struct xfrm_state_afinfo { struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); + struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); @@ -937,6 +938,7 @@ extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index c56b258fad73..616be131b4e3 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -80,6 +80,14 @@ __xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } +/* placeholder until ipv4's code is written */ +static struct xfrm_state * +__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + return NULL; +} + static struct xfrm_state * __xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, @@ -137,6 +145,7 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = { .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, + .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, .find_acq = __xfrm4_find_acq, }; diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 2fb07850449f..9c95b9d3e110 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,6 +63,26 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } +static struct xfrm_state * +__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto) +{ + struct xfrm_state *x = NULL; + unsigned h; + + h = __xfrm6_src_hash(saddr); + list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { + if (x->props.family == AF_INET6 && + ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && + ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && + proto == x->id.proto) { + xfrm_state_hold(x); + return x; + } + } + return NULL; +} + static struct xfrm_state * __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) { @@ -140,6 +160,7 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, + .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .find_acq = __xfrm6_find_acq, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 2a9992894e69..11f480b12952 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -487,6 +487,16 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, + int use_spi) +{ + if (use_spi) + return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + else + return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); +} + static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) @@ -495,6 +505,7 @@ int xfrm_state_add(struct xfrm_state *x) struct xfrm_state *x1; int family; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; afinfo = xfrm_state_get_afinfo(family); @@ -503,7 +514,7 @@ int xfrm_state_add(struct xfrm_state *x) spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -511,7 +522,7 @@ int xfrm_state_add(struct xfrm_state *x) goto out; } - if (x->km.seq) { + if (use_spi && x->km.seq) { x1 = __xfrm_find_acq_byseq(x->km.seq); if (x1 && xfrm_addr_cmp(&x1->id.daddr, &x->id.daddr, family)) { xfrm_state_put(x1); @@ -519,7 +530,7 @@ int xfrm_state_add(struct xfrm_state *x) } } - if (!x1) + if (use_spi && !x1) x1 = afinfo->find_acq( x->props.mode, x->props.reqid, x->id.proto, &x->id.daddr, &x->props.saddr, 0); @@ -548,13 +559,14 @@ int xfrm_state_update(struct xfrm_state *x) struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; + int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); afinfo = xfrm_state_get_afinfo(x->props.family); if (unlikely(afinfo == NULL)) return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); + x1 = __xfrm_state_locate(afinfo, x, use_spi); err = -ESRCH; if (!x1) @@ -674,6 +686,23 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, } EXPORT_SYMBOL(xfrm_state_lookup); +struct xfrm_state * +xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, + u8 proto, unsigned short family) +{ + struct xfrm_state *x; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return NULL; + + spin_lock_bh(&xfrm_state_lock); + x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return x; +} +EXPORT_SYMBOL(xfrm_state_lookup_byaddr); + struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 41f3d51ffc33..b5f8ab71aa54 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -87,6 +87,22 @@ static int verify_encap_tmpl(struct rtattr **xfrma) return 0; } +static int verify_one_addr(struct rtattr **xfrma, enum xfrm_attr_type_t type, + xfrm_address_t **addrp) +{ + struct rtattr *rt = xfrma[type - 1]; + + if (!rt) + return 0; + + if ((rt->rta_len - sizeof(*rt)) < sizeof(**addrp)) + return -EINVAL; + + if (addrp) + *addrp = RTA_DATA(rt); + + return 0; +} static inline int verify_sec_ctx_len(struct rtattr **xfrma) { @@ -418,16 +434,48 @@ out: return err; } +static struct xfrm_state *xfrm_user_state_lookup(struct xfrm_usersa_id *p, + struct rtattr **xfrma, + int *errp) +{ + struct xfrm_state *x = NULL; + int err; + + if (xfrm_id_proto_match(p->proto, IPSEC_PROTO_ANY)) { + err = -ESRCH; + x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + } else { + xfrm_address_t *saddr = NULL; + + err = verify_one_addr(xfrma, XFRMA_SRCADDR, &saddr); + if (err) + goto out; + + if (!saddr) { + err = -EINVAL; + goto out; + } + + x = xfrm_state_lookup_byaddr(&p->daddr, saddr, p->proto, + p->family); + } + + out: + if (!x && errp) + *errp = err; + return x; +} + static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { struct xfrm_state *x; - int err; + int err = -ESRCH; struct km_event c; struct xfrm_usersa_id *p = NLMSG_DATA(nlh); - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) - return -ESRCH; + return err; if ((err = security_xfrm_state_delete(x)) != 0) goto out; @@ -578,10 +626,9 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) struct xfrm_usersa_id *p = NLMSG_DATA(nlh); struct xfrm_state *x; struct sk_buff *resp_skb; - int err; + int err = -ESRCH; - x = xfrm_state_lookup(&p->daddr, p->spi, p->proto, p->family); - err = -ESRCH; + x = xfrm_user_state_lookup(p, (struct rtattr **)xfrma, &err); if (x == NULL) goto out_noput; -- cgit v1.3-8-gc7d7 From aee5adb4307c4c63a4dc5f3b49984d76f8a71b5b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 17:57:28 -0700 Subject: [XFRM] STATE: Add a hook to find offset to be inserted header in outbound. On current kernel, ip6_find_1stfragopt() is used by IPv6 IPsec to find offset to be inserted header in outbound for transport mode. (BTW, no usage may be needed for IPv4 case.) Mobile IPv6 requires another logic for routing header and destination options header respectively. This patch is common platform for the offset and adopts it to IPsec. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv6/ah6.c | 3 ++- net/ipv6/esp6.c | 3 ++- net/ipv6/ipcomp6.c | 1 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_mode_transport.c | 2 +- net/ipv6/xfrm6_output.c | 6 ++++++ 7 files changed, 16 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d9c40e713184..eed48f832ce1 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -265,6 +265,7 @@ struct xfrm_type void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); + int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; @@ -960,6 +961,8 @@ extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); extern void xfrm6_tunnel_free_spi(xfrm_address_t *saddr); extern u32 xfrm6_tunnel_spi_lookup(xfrm_address_t *saddr); extern int xfrm6_output(struct sk_buff *skb); +extern int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, + u8 **prevhdr); #ifdef CONFIG_XFRM extern int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type); diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index 60954fc7eb36..6c0aa51319a5 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -435,7 +435,8 @@ static struct xfrm_type ah6_type = .init_state = ah6_init_state, .destructor = ah6_destroy, .input = ah6_input, - .output = ah6_output + .output = ah6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ah6_protocol = { diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 2b8e52e1d0ab..ae50b9511151 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -379,7 +379,8 @@ static struct xfrm_type esp6_type = .destructor = esp6_destroy, .get_max_size = esp6_get_max_size, .input = esp6_input, - .output = esp6_output + .output = esp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol esp6_protocol = { diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index 19eba8d9f851..ad9c6e824e62 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -461,6 +461,7 @@ static struct xfrm_type ipcomp6_type = .destructor = ipcomp6_destroy, .input = ipcomp6_input, .output = ipcomp6_output, + .hdr_offset = xfrm6_find_1stfragopt, }; static struct inet6_protocol ipcomp6_protocol = diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index dd4d1ce77769..e1a741612888 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); EXPORT_SYMBOL(ipv6_push_nfrag_opts); diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index 711d713e36d8..a5dce216024d 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -35,7 +35,7 @@ static int xfrm6_transport_output(struct sk_buff *skb) skb_push(skb, x->props.header_len); iph = skb->nh.ipv6h; - hdr_len = ip6_find_1stfragopt(skb, &prevhdr); + hdr_len = x->type->hdr_offset(x, skb, &prevhdr); skb->nh.raw = prevhdr - x->props.header_len; skb->h.raw = skb->data + hdr_len; memmove(skb->data, iph, hdr_len); diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 26f18869f77b..b4628fbf8ff5 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -17,6 +17,12 @@ #include #include +int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb, + u8 **prevhdr) +{ + return ip6_find_1stfragopt(skb, prevhdr); +} + static int xfrm6_tunnel_check_size(struct sk_buff *skb) { int mtu, ret = 0; -- cgit v1.3-8-gc7d7 From fbd9a5b47ee9c319ff0cae584391241ce78ffd6b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:08:21 -0700 Subject: [XFRM] STATE: Common receive function for route optimization extension headers. XFRM_STATE_WILDRECV flag is introduced; the last resort state is set it and receives packet which is not route optimized but uses such extension headers i.e. Mobile IPv6 signaling (binding update and acknowledgement). A node enabled Mobile IPv6 adds the state. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 2 + net/ipv6/ipv6_syms.c | 1 + net/ipv6/xfrm6_input.c | 108 +++++++++++++++++++++++++++++++++++++++++++++++++ net/xfrm/xfrm_state.c | 1 + 5 files changed, 113 insertions(+) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 66343d3d4b91..a7c9e4cfb15b 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -256,6 +256,7 @@ struct xfrm_usersa_info { #define XFRM_STATE_NOECN 1 #define XFRM_STATE_DECAP_DSCP 2 #define XFRM_STATE_NOPMTUDISC 4 +#define XFRM_STATE_WILDRECV 8 }; struct xfrm_usersa_id { diff --git a/include/net/xfrm.h b/include/net/xfrm.h index eed48f832ce1..0d735a5aba61 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -955,6 +955,8 @@ extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); +extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto); extern int xfrm6_tunnel_register(struct xfrm6_tunnel *handler); extern int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler); extern u32 xfrm6_tunnel_alloc_spi(xfrm_address_t *saddr); diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index e1a741612888..7b7b90d9c3d0 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -31,6 +31,7 @@ EXPORT_SYMBOL(ipv6_chk_addr); EXPORT_SYMBOL(in6_dev_finish_destroy); #ifdef CONFIG_XFRM EXPORT_SYMBOL(xfrm6_rcv); +EXPORT_SYMBOL(xfrm6_input_addr); EXPORT_SYMBOL(xfrm6_find_1stfragopt); #endif EXPORT_SYMBOL(rt6_lookup); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index ee2f6b3908b6..a40a05789013 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -138,3 +138,111 @@ int xfrm6_rcv(struct sk_buff **pskb) { return xfrm6_rcv_spi(*pskb, 0); } + +int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, + xfrm_address_t *saddr, u8 proto) +{ + struct xfrm_state *x = NULL; + int wildcard = 0; + struct in6_addr any; + xfrm_address_t *xany; + struct xfrm_state *xfrm_vec_one = NULL; + int nh = 0; + int i = 0; + + ipv6_addr_set(&any, 0, 0, 0, 0); + xany = (xfrm_address_t *)&any; + + for (i = 0; i < 3; i++) { + xfrm_address_t *dst, *src; + switch (i) { + case 0: + dst = daddr; + src = saddr; + break; + case 1: + /* lookup state with wild-card source address */ + wildcard = 1; + dst = daddr; + src = xany; + break; + case 2: + default: + /* lookup state with wild-card addresses */ + wildcard = 1; /* XXX */ + dst = xany; + src = xany; + break; + } + + x = xfrm_state_lookup_byaddr(dst, src, proto, AF_INET6); + if (!x) + continue; + + spin_lock(&x->lock); + + if (wildcard) { + if ((x->props.flags & XFRM_STATE_WILDRECV) == 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + } + + if (unlikely(x->km.state != XFRM_STATE_VALID)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + if (xfrm_state_check_expire(x)) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + nh = x->type->input(x, skb); + if (nh <= 0) { + spin_unlock(&x->lock); + xfrm_state_put(x); + x = NULL; + continue; + } + + x->curlft.bytes += skb->len; + x->curlft.packets++; + + spin_unlock(&x->lock); + + xfrm_vec_one = x; + break; + } + + if (!xfrm_vec_one) + goto drop; + + /* Allocate new secpath or COW existing one. */ + if (!skb->sp || atomic_read(&skb->sp->refcnt) != 1) { + struct sec_path *sp; + sp = secpath_dup(skb->sp); + if (!sp) + goto drop; + if (skb->sp) + secpath_put(skb->sp); + skb->sp = sp; + } + + if (1 + skb->sp->len > XFRM_MAX_DEPTH) + goto drop; + + skb->sp->xvec[skb->sp->len] = xfrm_vec_one; + skb->sp->len ++; + + return 1; +drop: + if (xfrm_vec_one) + xfrm_state_put(xfrm_vec_one); + return -1; +} diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 11f480b12952..f05371556cce 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -352,6 +352,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && + !(x->props.flags & XFRM_STATE_WILDRECV) && xfrm_state_addr_check(x, daddr, saddr, family) && tmpl->mode == x->props.mode && tmpl->id.proto == x->id.proto && -- cgit v1.3-8-gc7d7 From 99505a843673faeae962a8cde128c7c034ba6b5e Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:10:33 -0700 Subject: [XFRM] STATE: Add a hook to obtain local/remote outbound address. Outbound transformation replaces both source and destination address with state's end-point addresses at the same time when IPsec tunnel mode. It is also required to change them for Mobile IPv6 route optimization, but we should care about the following differences: - changing result is not end-point but care-of address - either source or destination is replaced for each state This hook is a common platform to change outbound address. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 ++ net/ipv6/xfrm6_policy.c | 20 ++++++++++++++++++-- 2 files changed, 20 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0d735a5aba61..aa3ac994477b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -266,6 +266,8 @@ struct xfrm_type int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); + xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); + xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); /* Estimate maximal size of result of transformation of a dgram */ u32 (*get_max_size)(struct xfrm_state *, int size); }; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 81355bb50328..9328fc88708a 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -59,6 +59,22 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) return dst; } +static inline struct in6_addr* +__xfrm6_bundle_addr_remote(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->remote_addr) ? + (struct in6_addr*)x->type->remote_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->id.daddr; +} + +static inline struct in6_addr* +__xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) +{ + return (x->type->local_addr) ? + (struct in6_addr*)x->type->local_addr(x, (xfrm_address_t *)addr) : + (struct in6_addr*)&x->props.saddr; +} + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -115,8 +131,8 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst1->next = dst_prev; dst_prev = dst1; if (xfrm[i]->props.mode != XFRM_MODE_TRANSPORT) { - remote = (struct in6_addr*)&xfrm[i]->id.daddr; - local = (struct in6_addr*)&xfrm[i]->props.saddr; + remote = __xfrm6_bundle_addr_remote(xfrm[i], remote); + local = __xfrm6_bundle_addr_local(xfrm[i], local); tunnel = 1; } header_len += xfrm[i]->props.header_len; -- cgit v1.3-8-gc7d7 From 1b5c229987dc4d0c92a38fac0cde2aeec08cd775 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:11:50 -0700 Subject: [XFRM] STATE: Support non-fragment outbound transformation headers. For originated outbound IPv6 packets which will fragment, ip6_append_data() should know length of extension headers before sending them and the length is carried by dst_entry. IPv6 IPsec headers fragment then transformation was designed to place all headers after fragment header. OTOH Mobile IPv6 extension headers do not fragment then it is a good idea to make dst_entry have non-fragment length to tell it to ip6_append_data(). Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/dst.h | 1 + include/net/xfrm.h | 2 ++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/ip6_output.c | 2 +- net/ipv6/xfrm6_policy.c | 24 ++++++++++++++++++++++-- 5 files changed, 27 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/dst.h b/include/net/dst.h index 36d54fc248b0..a8d825f90305 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -54,6 +54,7 @@ struct dst_entry unsigned long expires; unsigned short header_len; /* more space at head required */ + unsigned short nfheader_len; /* more non-fragment space at head required */ unsigned short trailer_len; /* space to reserve at tail */ u32 metrics[RTAX_MAX]; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa3ac994477b..aa93cc1f6299 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -260,6 +260,8 @@ struct xfrm_type char *description; struct module *owner; __u8 proto; + __u8 flags; +#define XFRM_TYPE_NON_FRAGMENT 1 int (*init_state)(struct xfrm_state *x); void (*destructor)(struct xfrm_state *); diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index a5bed741de2c..e517981ceadd 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -135,6 +135,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = 0; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 2a376b7d91b4..258e3e45f5e0 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -971,7 +971,7 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, hh_len = LL_RESERVED_SPACE(rt->u.dst.dev); - fragheaderlen = sizeof(struct ipv6hdr) + (opt ? opt->opt_nflen : 0); + fragheaderlen = sizeof(struct ipv6hdr) + rt->u.dst.nfheader_len + (opt ? opt->opt_nflen : 0); maxfraglen = ((mtu - fragheaderlen) & ~7) + fragheaderlen - sizeof(struct frag_hdr); if (mtu <= sizeof(struct ipv6hdr) + IPV6_MAXPLEN) { diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 9328fc88708a..a3f68c8b737e 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -75,6 +75,24 @@ __xfrm6_bundle_addr_local(struct xfrm_state *x, struct in6_addr *addr) (struct in6_addr*)&x->props.saddr; } +static inline void +__xfrm6_bundle_len_inc(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen += x->props.header_len; + else + *len += x->props.header_len; +} + +static inline void +__xfrm6_bundle_len_dec(int *len, int *nflen, struct xfrm_state *x) +{ + if (x->type->flags & XFRM_TYPE_NON_FRAGMENT) + *nflen -= x->props.header_len; + else + *len -= x->props.header_len; +} + /* Allocate chain of dst_entry's, attach known xfrm's, calculate * all the metrics... Shortly, bundle a bundle. */ @@ -99,6 +117,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int int i; int err = 0; int header_len = 0; + int nfheader_len = 0; int trailer_len = 0; dst = dst_prev = NULL; @@ -135,7 +154,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int local = __xfrm6_bundle_addr_local(xfrm[i], local); tunnel = 1; } - header_len += xfrm[i]->props.header_len; + __xfrm6_bundle_len_inc(&header_len, &nfheader_len, xfrm[i]); trailer_len += xfrm[i]->props.trailer_len; if (tunnel) { @@ -170,6 +189,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int dst_prev->flags |= DST_HOST; dst_prev->lastuse = jiffies; dst_prev->header_len = header_len; + dst_prev->nfheader_len = nfheader_len; dst_prev->trailer_len = trailer_len; memcpy(&dst_prev->metrics, &x->route->metrics, sizeof(dst_prev->metrics)); @@ -188,7 +208,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int x->u.rt6.rt6i_src = rt0->rt6i_src; x->u.rt6.rt6i_idev = rt0->rt6i_idev; in6_dev_hold(rt0->rt6i_idev); - header_len -= x->u.dst.xfrm->props.header_len; + __xfrm6_bundle_len_dec(&header_len, &nfheader_len, x->u.dst.xfrm); trailer_len -= x->u.dst.xfrm->props.trailer_len; } -- cgit v1.3-8-gc7d7 From 060f02a3bdd4d9ba8aa3c48e9b470672b1f3a585 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 18:18:55 -0700 Subject: [XFRM] STATE: Introduce care-of address. Care-of address is carried by state as a transformation option like IPsec encryption/authentication algorithm. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/xfrm/xfrm_state.c | 6 ++++++ net/xfrm/xfrm_user.c | 28 +++++++++++++++++++++++++++- 4 files changed, 37 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index a7c9e4cfb15b..b53f799189af 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -235,6 +235,7 @@ enum xfrm_attr_type_t { XFRMA_REPLAY_THRESH, XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ + XFRMA_COADDR, /* xfrm_address_t */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index aa93cc1f6299..872a2a4022b2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -134,6 +134,9 @@ struct xfrm_state /* Data for encapsulator */ struct xfrm_encap_tmpl *encap; + /* Data for care-of address */ + xfrm_address_t *coaddr; + /* IPComp needs an IPIP tunnel for handling uncompressed packets */ struct xfrm_state *tunnel; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index f05371556cce..3da89c01ea71 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -78,6 +78,7 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) kfree(x->ealg); kfree(x->calg); kfree(x->encap); + kfree(x->coaddr); if (x->mode) xfrm_put_mode(x->mode); if (x->type) { @@ -603,6 +604,11 @@ out: if (likely(x1->km.state == XFRM_STATE_VALID)) { if (x->encap && x1->encap) memcpy(x1->encap, x->encap, sizeof(*x1->encap)); + if (x->coaddr && x1->coaddr) { + memcpy(x1->coaddr, x->coaddr, sizeof(*x1->coaddr)); + } + if (!use_spi && memcmp(&x1->sel, &x->sel, sizeof(x1->sel))) + memcpy(&x1->sel, &x->sel, sizeof(x1->sel)); memcpy(&x1->lft, &x->lft, sizeof(x1->lft)); x1->km.dying = 0; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index b5f8ab71aa54..939808de9e20 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -187,11 +187,14 @@ static int verify_newsa_info(struct xfrm_usersa_info *p, goto out; if ((err = verify_sec_ctx_len(xfrma))) goto out; + if ((err = verify_one_addr(xfrma, XFRMA_COADDR, NULL))) + goto out; err = -EINVAL; switch (p->mode) { case XFRM_MODE_TRANSPORT: case XFRM_MODE_TUNNEL: + case XFRM_MODE_ROUTEOPTIMIZATION: break; default: @@ -276,6 +279,24 @@ static int attach_sec_ctx(struct xfrm_state *x, struct rtattr *u_arg) return security_xfrm_state_alloc(x, uctx); } +static int attach_one_addr(xfrm_address_t **addrpp, struct rtattr *u_arg) +{ + struct rtattr *rta = u_arg; + xfrm_address_t *p, *uaddrp; + + if (!rta) + return 0; + + uaddrp = RTA_DATA(rta); + p = kmalloc(sizeof(*p), GFP_KERNEL); + if (!p) + return -ENOMEM; + + memcpy(p, uaddrp, sizeof(*p)); + *addrpp = p; + return 0; +} + static void copy_from_user_state(struct xfrm_state *x, struct xfrm_usersa_info *p) { memcpy(&x->id, &p->id, sizeof(x->id)); @@ -365,7 +386,8 @@ static struct xfrm_state *xfrm_state_construct(struct xfrm_usersa_info *p, goto error; if ((err = attach_encap_tmpl(&x->encap, xfrma[XFRMA_ENCAP-1]))) goto error; - + if ((err = attach_one_addr(&x->coaddr, xfrma[XFRMA_COADDR-1]))) + goto error; err = xfrm_init_state(x); if (err) goto error; @@ -569,6 +591,10 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) uctx->ctx_len = x->security->ctx_len; memcpy(uctx + 1, x->security->ctx_str, x->security->ctx_len); } + + if (x->coaddr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; -- cgit v1.3-8-gc7d7 From 9afaca057980c02771f4657c455cc7592fcd7373 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 18:20:16 -0700 Subject: [XFRM] IPV6: Update outbound state timestamp for each sending. With this patch transformation state is updated last used time for each sending. Xtime is used for it like other state lifetime expiration. Mobile IPv6 enabled nodes will want to know traffic status of each binding (e.g. judgement to request binding refresh by correspondent node, or to keep home/care-of nonce alive by mobile node). The last used timestamp is an important hint about it. Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 1 + include/net/xfrm.h | 3 +++ net/ipv6/xfrm6_output.c | 2 ++ net/xfrm/xfrm_user.c | 3 +++ 4 files changed, 9 insertions(+) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index b53f799189af..1d8c1f22c12d 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -236,6 +236,7 @@ enum xfrm_attr_type_t { XFRMA_ETIMER_THRESH, XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ + XFRMA_LASTUSED, __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 872a2a4022b2..248874ecf8df 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -167,6 +167,9 @@ struct xfrm_state struct xfrm_lifetime_cur curlft; struct timer_list timer; + /* Last used time */ + u64 lastused; + /* Reference to data common to all the instances of this * transformer. */ struct xfrm_type *type; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index b4628fbf8ff5..db58104e710b 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -75,6 +75,8 @@ static int xfrm6_output_one(struct sk_buff *skb) x->curlft.bytes += skb->len; x->curlft.packets++; + if (x->props.mode == XFRM_MODE_ROUTEOPTIMIZATION) + x->lastused = (u64)xtime.tv_sec; spin_unlock_bh(&x->lock); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 939808de9e20..f643063a1cbd 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -595,6 +595,9 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) if (x->coaddr) RTA_PUT(skb, XFRMA_COADDR, sizeof(*x->coaddr), x->coaddr); + if (x->lastused) + RTA_PUT(skb, XFRMA_LASTUSED, sizeof(x->lastused), &x->lastused); + nlh->nlmsg_len = skb->tail - b; out: sp->this_idx++; -- cgit v1.3-8-gc7d7 From e53820de0f81da1429048634cadc6ef5f50c2f8b Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:12:01 -0700 Subject: [XFRM] IPV6: Restrict bundle reusing For outbound transformation, bundle is checked whether it is suitable for current flow to be reused or not. In such IPv6 case as below, transformation may apply incorrect bundle for the flow instead of creating another bundle: - The policy selector has destination prefix length < 128 (Two or more addresses can be matched it) - Its bundle holds dst entry of default route whose prefix length < 128 (Previous traffic was used such route as next hop) - The policy and the bundle were used a transport mode state and this time flow address is not matched the bundled state. This issue is found by Mobile IPv6 usage to protect mobility signaling by IPsec, but it is not a Mobile IPv6 specific. This patch adds strict check to xfrm_bundle_ok() for each state mode and address when prefix length is less than 128. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 19 ++++++++++++++++++- net/ipv4/xfrm4_policy.c | 2 +- net/ipv6/xfrm6_policy.c | 4 +++- net/xfrm/xfrm_policy.c | 8 ++++++-- 4 files changed, 28 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 248874ecf8df..7f1630630dcf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -869,6 +869,23 @@ xfrm_state_addr_check(struct xfrm_state *x, return 0; } +static __inline__ int +xfrm_state_addr_flow_check(struct xfrm_state *x, struct flowi *fl, + unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_state_addr_check(x, + (xfrm_address_t *)&fl->fl4_dst, + (xfrm_address_t *)&fl->fl4_src); + case AF_INET6: + return __xfrm6_state_addr_check(x, + (xfrm_address_t *)&fl->fl6_dst, + (xfrm_address_t *)&fl->fl6_src); + } + return 0; +} + static inline int xfrm_state_kern(struct xfrm_state *x) { return atomic_read(&x->tunnel_users); @@ -1014,7 +1031,7 @@ extern void xfrm_policy_flush(void); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); extern void xfrm_flush_all_bundles(void); -extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family); +extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index e517981ceadd..42d8ded0f96a 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -33,7 +33,7 @@ __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt.fl.fl4_dst == fl->fl4_dst && xdst->u.rt.fl.fl4_src == fl->fl4_src && xdst->u.rt.fl.fl4_tos == fl->fl4_tos && - xfrm_bundle_ok(xdst, fl, AF_INET)) { + xfrm_bundle_ok(xdst, fl, AF_INET, 0)) { dst_clone(dst); break; } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index a3f68c8b737e..729b4748d6d3 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -50,7 +50,9 @@ __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) xdst->u.rt6.rt6i_src.plen); if (ipv6_addr_equal(&xdst->u.rt6.rt6i_dst.addr, &fl_dst_prefix) && ipv6_addr_equal(&xdst->u.rt6.rt6i_src.addr, &fl_src_prefix) && - xfrm_bundle_ok(xdst, fl, AF_INET6)) { + xfrm_bundle_ok(xdst, fl, AF_INET6, + (xdst->u.rt6.rt6i_dst.plen != 128 || + xdst->u.rt6.rt6i_src.plen != 128))) { dst_clone(dst); break; } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 56abb5c057d4..ad2a5cba1f5b 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1167,7 +1167,7 @@ static struct dst_entry *xfrm_dst_check(struct dst_entry *dst, u32 cookie) static int stale_bundle(struct dst_entry *dst) { - return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC); + return !xfrm_bundle_ok((struct xfrm_dst *)dst, NULL, AF_UNSPEC, 0); } void xfrm_dst_ifdown(struct dst_entry *dst, struct net_device *dev) @@ -1282,7 +1282,7 @@ EXPORT_SYMBOL(xfrm_init_pmtu); * still valid. */ -int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) +int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int strict) { struct dst_entry *dst = &first->u.dst; struct xfrm_dst *last; @@ -1304,6 +1304,10 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family) if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && + !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) + return 0; + mtu = dst_mtu(dst->child); if (xdst->child_mtu_cached != mtu) { last = xdst; -- cgit v1.3-8-gc7d7 From 65d4ed92219b28875efb52de5700da8c3dfa83e1 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:16:22 -0700 Subject: [IPV6] MIP6: Add inbound interface of routing header type 2. Add inbound interface of routing header type 2 for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/addrconf.h | 7 +++++ net/ipv6/exthdrs.c | 69 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 68 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 3d71251b3eca..5fc8627435eb 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,6 +61,13 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); +/* XXX: this is a placeholder till addrconf supports */ +#ifdef CONFIG_IPV6_MIP6 +static inline int ipv6_chk_home_addr(struct in6_addr *addr) +{ + return 0; +} +#endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, int strict); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 05afa6b1912b..8d3a0e17314d 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -43,6 +43,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include @@ -219,7 +222,7 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) { struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); - struct in6_addr *addr; + struct in6_addr *addr = NULL; struct in6_addr daddr; int n, i; @@ -244,6 +247,23 @@ static int ipv6_rthdr_rcv(struct sk_buff **skbp) looped_back: if (hdr->segments_left == 0) { + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard type 2 header unless it was + * processed by own + */ + if (!addr) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + opt->lastopt = skb->h.raw - skb->nh.raw; opt->srcrt = skb->h.raw - skb->nh.raw; skb->h.raw += (hdr->hdrlen + 1) << 3; @@ -253,17 +273,29 @@ looped_back: return 1; } - if (hdr->type != IPV6_SRCRT_TYPE_0) { + switch (hdr->type) { + case IPV6_SRCRT_TYPE_0: + if (hdr->hdrlen & 0x01) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); + return -1; + } + break; +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + /* Silently discard invalid RTH type 2 */ + if (hdr->hdrlen != 2 || hdr->segments_left != 1) { + IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->type) - skb->nh.raw); return -1; } - - if (hdr->hdrlen & 0x01) { - IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); - icmpv6_param_prob(skb, ICMPV6_HDR_FIELD, (&hdr->hdrlen) - skb->nh.raw); - return -1; - } /* * This is the routing header forwarding algorithm from @@ -303,6 +335,27 @@ looped_back: addr = rthdr->addr; addr += i - 1; + switch (hdr->type) { +#ifdef CONFIG_IPV6_MIP6 + case IPV6_SRCRT_TYPE_2: + if (xfrm6_input_addr(skb, (xfrm_address_t *)addr, + (xfrm_address_t *)&skb->nh.ipv6h->saddr, + IPPROTO_ROUTING) < 0) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + if (!ipv6_chk_home_addr(addr)) { + IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); + kfree_skb(skb); + return -1; + } + break; +#endif + default: + break; + } + if (ipv6_addr_is_multicast(addr)) { IP6_INC_STATS_BH(IPSTATS_MIB_INADDRERRORS); kfree_skb(skb); -- cgit v1.3-8-gc7d7 From c61a404325093250b676f40ad8f4dd00f3bcab5f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:18:35 -0700 Subject: [IPV6]: Find option offset by type. This is a helper to search option offset from extension header which can carry TLV option like destination options header. Mobile IPv6 home address option will use it. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 ++ net/ipv6/exthdrs.c | 43 +++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 45 insertions(+) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index ece7e8a84ffd..c4ea12710576 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -506,6 +506,8 @@ extern int ipv6_skip_exthdr(const struct sk_buff *, int start, extern int ipv6_ext_hdr(u8 nexthdr); +extern int ipv6_find_tlv(struct sk_buff *skb, int offset, int type); + extern struct ipv6_txoptions * ipv6_invert_rthdr(struct sock *sk, struct ipv6_rt_hdr *hdr); diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 8d3a0e17314d..50ff49e518bc 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -49,6 +49,49 @@ #include +int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) +{ + int packet_len = skb->tail - skb->nh.raw; + struct ipv6_opt_hdr *hdr; + int len; + + if (offset + 2 > packet_len) + goto bad; + hdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + len = ((hdr->hdrlen + 1) << 3); + + if (offset + len > packet_len) + goto bad; + + offset += 2; + len -= 2; + + while (len > 0) { + int opttype = skb->nh.raw[offset]; + int optlen; + + if (opttype == type) + return offset; + + switch (opttype) { + case IPV6_TLV_PAD0: + optlen = 1; + break; + default: + optlen = skb->nh.raw[offset + 1] + 2; + if (optlen > len) + goto bad; + break; + } + offset += optlen; + len -= optlen; + } + /* not_found */ + return -1; + bad: + return -1; +} + /* * Parsing tlv encoded headers. * -- cgit v1.3-8-gc7d7 From a80ff03e05e4343d647780c116b02ec86078fd24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 19:19:50 -0700 Subject: [IPV6]: Allow to replace skbuff by TLV parser. In receiving Mobile IPv6 home address option which is a TLV carried by destination options header, kernel will try to mangle source adderss of packet. Think of cloned skbuff it is required to replace it by the parser just like routing header case. This is a framework to achieve that to allow TLV parser to replace inbound skbuff pointer. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/ipv6.h | 2 +- net/ipv6/exthdrs.c | 29 +++++++++++++++++++---------- net/ipv6/ip6_input.c | 2 +- 3 files changed, 21 insertions(+), 12 deletions(-) (limited to 'include/net') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index c4ea12710576..8e6ec6063f8c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -229,7 +229,7 @@ extern int ip6_ra_control(struct sock *sk, int sel, void (*destructor)(struct sock *)); -extern int ipv6_parse_hopopts(struct sk_buff *skb); +extern int ipv6_parse_hopopts(struct sk_buff **skbp); extern struct ipv6_txoptions * ipv6_dup_options(struct sock *sk, struct ipv6_txoptions *opt); extern struct ipv6_txoptions * ipv6_renew_options(struct sock *sk, struct ipv6_txoptions *opt, diff --git a/net/ipv6/exthdrs.c b/net/ipv6/exthdrs.c index 50ff49e518bc..1cdd0f0b5d34 100644 --- a/net/ipv6/exthdrs.c +++ b/net/ipv6/exthdrs.c @@ -102,7 +102,7 @@ int ipv6_find_tlv(struct sk_buff *skb, int offset, int type) struct tlvtype_proc { int type; - int (*func)(struct sk_buff *skb, int offset); + int (*func)(struct sk_buff **skbp, int offset); }; /********************* @@ -111,8 +111,10 @@ struct tlvtype_proc { /* An unknown option is detected, decide what to do */ -static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) +static int ip6_tlvopt_unknown(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + switch ((skb->nh.raw[optoff] & 0xC0) >> 6) { case 0: /* ignore */ return 1; @@ -137,8 +139,9 @@ static int ip6_tlvopt_unknown(struct sk_buff *skb, int optoff) /* Parse tlv encoded option header (hop-by-hop or destination) */ -static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) +static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct tlvtype_proc *curr; int off = skb->h.raw - skb->nh.raw; int len = ((skb->h.raw[1]+1)<<3); @@ -168,13 +171,13 @@ static int ip6_parse_tlv(struct tlvtype_proc *procs, struct sk_buff *skb) /* type specific length/alignment checks will be performed in the func(). */ - if (curr->func(skb, off) == 0) + if (curr->func(skbp, off) == 0) return 0; break; } } if (curr->type < 0) { - if (ip6_tlvopt_unknown(skb, off) == 0) + if (ip6_tlvopt_unknown(skbp, off) == 0) return 0; } break; @@ -213,7 +216,8 @@ static int ipv6_destopt_rcv(struct sk_buff **skbp) opt->lastopt = skb->h.raw - skb->nh.raw; opt->dst1 = skb->h.raw - skb->nh.raw; - if (ip6_parse_tlv(tlvprocdestopt_lst, skb)) { + if (ip6_parse_tlv(tlvprocdestopt_lst, skbp)) { + skb = *skbp; skb->h.raw += ((skb->h.raw[1]+1)<<3); opt->nhoff = opt->dst1; return 1; @@ -517,8 +521,10 @@ EXPORT_SYMBOL_GPL(ipv6_invert_rthdr); /* Router Alert as of RFC 2711 */ -static int ipv6_hop_ra(struct sk_buff *skb, int optoff) +static int ipv6_hop_ra(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; + if (skb->nh.raw[optoff+1] == 2) { IP6CB(skb)->ra = optoff; return 1; @@ -531,8 +537,9 @@ static int ipv6_hop_ra(struct sk_buff *skb, int optoff) /* Jumbo payload */ -static int ipv6_hop_jumbo(struct sk_buff *skb, int optoff) +static int ipv6_hop_jumbo(struct sk_buff **skbp, int optoff) { + struct sk_buff *skb = *skbp; u32 pkt_len; if (skb->nh.raw[optoff+1] != 4 || (optoff&3) != 2) { @@ -581,8 +588,9 @@ static struct tlvtype_proc tlvprochopopt_lst[] = { { -1, } }; -int ipv6_parse_hopopts(struct sk_buff *skb) +int ipv6_parse_hopopts(struct sk_buff **skbp) { + struct sk_buff *skb = *skbp; struct inet6_skb_parm *opt = IP6CB(skb); /* @@ -598,7 +606,8 @@ int ipv6_parse_hopopts(struct sk_buff *skb) } opt->hop = sizeof(struct ipv6hdr); - if (ip6_parse_tlv(tlvprochopopt_lst, skb)) { + if (ip6_parse_tlv(tlvprochopopt_lst, skbp)) { + skb = *skbp; skb->h.raw += (skb->h.raw[1]+1)<<3; opt->nhoff = sizeof(struct ipv6hdr); return 1; diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 25c2a9e03895..6b8e6d76a58b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -111,7 +111,7 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt } if (hdr->nexthdr == NEXTHDR_HOP) { - if (ipv6_parse_hopopts(skb) < 0) { + if (ipv6_parse_hopopts(&skb) < 0) { IP6_INC_STATS_BH(IPSTATS_MIB_INHDRERRORS); return 0; } -- cgit v1.3-8-gc7d7 From 2c8d7ca0f76103855ad1f2a930e05683b64a00eb Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 20:31:11 -0700 Subject: [IPV6] MIP6: Add routing header type 2 transformation. Add routing header type 2 transformation for Mobile IPv6. Based on MIPL2 kernel patch. Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 31 +++++++++ net/ipv6/Makefile | 2 + net/ipv6/af_inet6.c | 9 +++ net/ipv6/mip6.c | 181 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 223 insertions(+) create mode 100644 include/net/mip6.h create mode 100644 net/ipv6/mip6.c (limited to 'include/net') diff --git a/include/net/mip6.h b/include/net/mip6.h new file mode 100644 index 000000000000..644b8b673048 --- /dev/null +++ b/include/net/mip6.h @@ -0,0 +1,31 @@ +/* + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + * YOSHIFUJI Hideaki @USAGI + */ +#ifndef _NET_MIP6_H +#define _NET_MIP6_H + +extern int mip6_init(void); +extern void mip6_fini(void); + +#endif diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 87e912e31922..0213c6612b58 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -14,6 +14,8 @@ ipv6-$(CONFIG_XFRM) += xfrm6_policy.o xfrm6_state.o xfrm6_input.o \ xfrm6_output.o ipv6-$(CONFIG_NETFILTER) += netfilter.o ipv6-$(CONFIG_IPV6_MULTIPLE_TABLES) += fib6_rules.o +ipv6-$(CONFIG_IPV6_MIP6) += mip6.o + ipv6-objs += $(ipv6-y) obj-$(CONFIG_INET6_AH) += ah6.o diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index 57ee5ddea96f..fc9c8a99bea6 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -59,6 +59,9 @@ #ifdef CONFIG_IPV6_TUNNEL #include #endif +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include #include @@ -857,6 +860,9 @@ static int __init inet6_init(void) ipv6_frag_init(); ipv6_nodata_init(); ipv6_destopt_init(); +#ifdef CONFIG_IPV6_MIP6 + mip6_init(); +#endif /* Init v6 transport protocols. */ udpv6_init(); @@ -919,6 +925,9 @@ static void __exit inet6_exit(void) udp6_proc_exit(); tcp6_proc_exit(); raw6_proc_exit(); +#endif +#ifdef CONFIG_IPV6_MIP6 + mip6_fini(); #endif /* Cleanup code parts. */ sit_cleanup(); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c new file mode 100644 index 000000000000..63e548b6f81e --- /dev/null +++ b/net/ipv6/mip6.c @@ -0,0 +1,181 @@ +/* + * Copyright (C)2003-2006 Helsinki University of Technology + * Copyright (C)2003-2006 USAGI/WIDE Project + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ +/* + * Authors: + * Noriaki TAKAMIYA @USAGI + * Masahide NAKAMURA @USAGI + */ + +#include +#include +#include +#include +#include +#include +#include + +static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr) +{ + return x->coaddr; +} + +static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; + + if (!ipv6_addr_equal(&rt2->addr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return rt2->rt_hdr.nexthdr; +} + +/* Routing Header type 2 is inserted. + * IP Header's dst address is replaced with Routing Header's Home Address. + */ +static int mip6_rthdr_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct rt2_hdr *rt2; + u8 nexthdr; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_ROUTING; + + rt2 = (struct rt2_hdr *)skb->h.raw; + rt2->rt_hdr.nexthdr = nexthdr; + rt2->rt_hdr.hdrlen = (x->props.header_len >> 3) - 1; + rt2->rt_hdr.type = IPV6_SRCRT_TYPE_2; + rt2->rt_hdr.segments_left = 1; + memset(&rt2->reserved, 0, sizeof(rt2->reserved)); + + BUG_TRAP(rt2->rt_hdr.hdrlen == 2); + + memcpy(&rt2->addr, &iph->daddr, sizeof(rt2->addr)); + memcpy(&iph->daddr, x->coaddr, sizeof(iph->daddr)); + + return 0; +} + +static int mip6_rthdr_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + if (offset + 3 <= packet_len) { + struct ipv6_rt_hdr *rt; + rt = (struct ipv6_rt_hdr *)(skb->nh.raw + offset); + if (rt->type != 0) + return offset; + } + found_rhdr = 1; + break; + case NEXTHDR_DEST: + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) + return offset; + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_rthdr_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct rt2_hdr); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for routing + * header type 2 unlike IPsec protocols. + */ +static void mip6_rthdr_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_rthdr_type = +{ + .description = "MIP6RT", + .owner = THIS_MODULE, + .proto = IPPROTO_ROUTING, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_rthdr_init_state, + .destructor = mip6_rthdr_destroy, + .input = mip6_rthdr_input, + .output = mip6_rthdr_output, + .hdr_offset = mip6_rthdr_offset, + .remote_addr = mip6_xfrm_addr, +}; + +int __init mip6_init(void) +{ + printk(KERN_INFO "Mobile IPv6\n"); + + if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); + goto mip6_rthdr_xfrm_fail; + } + return 0; + + mip6_rthdr_xfrm_fail: + return -EAGAIN; +} + +void __exit mip6_fini(void) +{ + if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); +} -- cgit v1.3-8-gc7d7 From 3d126890dd67beffec27c1b6f51c040fc8d0b526 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Wed, 23 Aug 2006 20:32:34 -0700 Subject: [IPV6] MIP6: Add destination options header transformation. Add destination options header transformation for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala Signed-off-by: Noriaki TAKAMIYA Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 3 + net/ipv6/mip6.c | 167 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 170 insertions(+) (limited to 'include/net') diff --git a/include/net/mip6.h b/include/net/mip6.h index 644b8b673048..42b65bace122 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -25,6 +25,9 @@ #ifndef _NET_MIP6_H #define _NET_MIP6_H +#define MIP6_OPT_PAD_1 0 +#define MIP6_OPT_PAD_N 1 + extern int mip6_init(void); extern void mip6_fini(void); diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 63e548b6f81e..a8adf891fe0e 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -35,6 +35,165 @@ static xfrm_address_t *mip6_xfrm_addr(struct xfrm_state *x, xfrm_address_t *addr return x->coaddr; } +static inline unsigned int calc_padlen(unsigned int len, unsigned int n) +{ + return (n - len + 16) & 0x7; +} + +static inline void *mip6_padn(__u8 *data, __u8 padlen) +{ + if (!data) + return NULL; + if (padlen == 1) { + data[0] = MIP6_OPT_PAD_1; + } else if (padlen > 1) { + data[0] = MIP6_OPT_PAD_N; + data[1] = padlen - 2; + if (padlen > 2) + memset(data+2, 0, data[1]); + } + return data + padlen; +} + +static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph = skb->nh.ipv6h; + struct ipv6_destopt_hdr *destopt = (struct ipv6_destopt_hdr *)skb->data; + + if (!ipv6_addr_equal(&iph->saddr, (struct in6_addr *)x->coaddr) && + !ipv6_addr_any((struct in6_addr *)x->coaddr)) + return -ENOENT; + + return destopt->nexthdr; +} + +/* Destination Option Header is inserted. + * IP Header's src address is replaced with Home Address Option in + * Destination Option Header. + */ +static int mip6_destopt_output(struct xfrm_state *x, struct sk_buff *skb) +{ + struct ipv6hdr *iph; + struct ipv6_destopt_hdr *dstopt; + struct ipv6_destopt_hao *hao; + u8 nexthdr; + int len; + + iph = (struct ipv6hdr *)skb->data; + iph->payload_len = htons(skb->len - sizeof(*iph)); + + nexthdr = *skb->nh.raw; + *skb->nh.raw = IPPROTO_DSTOPTS; + + dstopt = (struct ipv6_destopt_hdr *)skb->h.raw; + dstopt->nexthdr = nexthdr; + + hao = mip6_padn((char *)(dstopt + 1), + calc_padlen(sizeof(*dstopt), 6)); + + hao->type = IPV6_TLV_HAO; + hao->length = sizeof(*hao) - 2; + BUG_TRAP(hao->length == 16); + + len = ((char *)hao - (char *)dstopt) + sizeof(*hao); + + memcpy(&hao->addr, &iph->saddr, sizeof(hao->addr)); + memcpy(&iph->saddr, x->coaddr, sizeof(iph->saddr)); + + BUG_TRAP(len == x->props.header_len); + dstopt->hdrlen = (x->props.header_len >> 3) - 1; + + return 0; +} + +static int mip6_destopt_offset(struct xfrm_state *x, struct sk_buff *skb, + u8 **nexthdr) +{ + u16 offset = sizeof(struct ipv6hdr); + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr*)(skb->nh.ipv6h + 1); + unsigned int packet_len = skb->tail - skb->nh.raw; + int found_rhdr = 0; + + *nexthdr = &skb->nh.ipv6h->nexthdr; + + while (offset + 1 <= packet_len) { + + switch (**nexthdr) { + case NEXTHDR_HOP: + break; + case NEXTHDR_ROUTING: + found_rhdr = 1; + break; + case NEXTHDR_DEST: + /* + * HAO MUST NOT appear more than once. + * XXX: It is better to try to find by the end of + * XXX: packet if HAO exists. + */ + if (ipv6_find_tlv(skb, offset, IPV6_TLV_HAO) >= 0) { + LIMIT_NETDEBUG(KERN_WARNING "mip6: hao exists already, override\n"); + return offset; + } + + if (found_rhdr) + return offset; + + break; + default: + return offset; + } + + offset += ipv6_optlen(exthdr); + *nexthdr = &exthdr->nexthdr; + exthdr = (struct ipv6_opt_hdr*)(skb->nh.raw + offset); + } + + return offset; +} + +static int mip6_destopt_init_state(struct xfrm_state *x) +{ + if (x->id.spi) { + printk(KERN_INFO "%s: spi is not 0: %u\n", __FUNCTION__, + x->id.spi); + return -EINVAL; + } + if (x->props.mode != XFRM_MODE_ROUTEOPTIMIZATION) { + printk(KERN_INFO "%s: state's mode is not %u: %u\n", + __FUNCTION__, XFRM_MODE_ROUTEOPTIMIZATION, x->props.mode); + return -EINVAL; + } + + x->props.header_len = sizeof(struct ipv6_destopt_hdr) + + calc_padlen(sizeof(struct ipv6_destopt_hdr), 6) + + sizeof(struct ipv6_destopt_hao); + BUG_TRAP(x->props.header_len == 24); + + return 0; +} + +/* + * Do nothing about destroying since it has no specific operation for + * destination options header unlike IPsec protocols. + */ +static void mip6_destopt_destroy(struct xfrm_state *x) +{ +} + +static struct xfrm_type mip6_destopt_type = +{ + .description = "MIP6DESTOPT", + .owner = THIS_MODULE, + .proto = IPPROTO_DSTOPTS, + .flags = XFRM_TYPE_NON_FRAGMENT, + .init_state = mip6_destopt_init_state, + .destructor = mip6_destopt_destroy, + .input = mip6_destopt_input, + .output = mip6_destopt_output, + .hdr_offset = mip6_destopt_offset, + .local_addr = mip6_xfrm_addr, +}; + static int mip6_rthdr_input(struct xfrm_state *x, struct sk_buff *skb) { struct rt2_hdr *rt2 = (struct rt2_hdr *)skb->data; @@ -164,6 +323,10 @@ int __init mip6_init(void) { printk(KERN_INFO "Mobile IPv6\n"); + if (xfrm_register_type(&mip6_destopt_type, AF_INET6) < 0) { + printk(KERN_INFO "%s: can't add xfrm type(destopt)\n", __FUNCTION__); + goto mip6_destopt_xfrm_fail; + } if (xfrm_register_type(&mip6_rthdr_type, AF_INET6) < 0) { printk(KERN_INFO "%s: can't add xfrm type(rthdr)\n", __FUNCTION__); goto mip6_rthdr_xfrm_fail; @@ -171,6 +334,8 @@ int __init mip6_init(void) return 0; mip6_rthdr_xfrm_fail: + xfrm_unregister_type(&mip6_destopt_type, AF_INET6); + mip6_destopt_xfrm_fail: return -EAGAIN; } @@ -178,4 +343,6 @@ void __exit mip6_fini(void) { if (xfrm_unregister_type(&mip6_rthdr_type, AF_INET6) < 0) printk(KERN_INFO "%s: can't remove xfrm type(rthdr)\n", __FUNCTION__); + if (xfrm_unregister_type(&mip6_destopt_type, AF_INET6) < 0) + printk(KERN_INFO "%s: can't remove xfrm type(destopt)\n", __FUNCTION__); } -- cgit v1.3-8-gc7d7 From 2b741653b6c824fe7520ee92b6795f11c5f24b24 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:34:26 -0700 Subject: [IPV6] MIP6: Add Mobility header definition. Add Mobility header definition for Mobile IPv6. Based on MIPL2 kernel patch. This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/in6.h | 1 + include/net/flow.h | 9 +++++++++ include/net/ipv6.h | 1 + include/net/mip6.h | 23 +++++++++++++++++++++++ 4 files changed, 34 insertions(+) (limited to 'include/net') diff --git a/include/linux/in6.h b/include/linux/in6.h index 086ec2ac8c5f..d776829b443f 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -134,6 +134,7 @@ struct in6_flowlabel_req #define IPPROTO_ICMPV6 58 /* ICMPv6 */ #define IPPROTO_NONE 59 /* IPv6 no next header */ #define IPPROTO_DSTOPTS 60 /* IPv6 destination options */ +#define IPPROTO_MH 135 /* IPv6 mobility header */ /* * IPv6 TLV options. diff --git a/include/net/flow.h b/include/net/flow.h index 21d988b2058a..e0522914316e 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -72,12 +72,21 @@ struct flowi { } dnports; __u32 spi; + +#ifdef CONFIG_IPV6_MIP6 + struct { + __u8 type; + } mht; +#endif } uli_u; #define fl_ip_sport uli_u.ports.sport #define fl_ip_dport uli_u.ports.dport #define fl_icmp_type uli_u.icmpt.type #define fl_icmp_code uli_u.icmpt.code #define fl_ipsec_spi uli_u.spi +#ifdef CONFIG_IPV6_MIP6 +#define fl_mh_type uli_u.mht.type +#endif __u32 secid; /* used by xfrm; see secid.txt */ } __attribute__((__aligned__(BITS_PER_LONG/8))); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 8e6ec6063f8c..72bf47b2a4e0 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -40,6 +40,7 @@ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ #define NEXTHDR_NONE 59 /* No next header */ #define NEXTHDR_DEST 60 /* Destination options header. */ +#define NEXTHDR_MOBILITY 135 /* Mobility header. */ #define NEXTHDR_MAX 255 diff --git a/include/net/mip6.h b/include/net/mip6.h index 42b65bace122..fd43178faace 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -28,6 +28,29 @@ #define MIP6_OPT_PAD_1 0 #define MIP6_OPT_PAD_N 1 +/* + * Mobility Header + */ +struct ip6_mh { + __u8 ip6mh_proto; + __u8 ip6mh_hdrlen; + __u8 ip6mh_type; + __u8 ip6mh_reserved; + __u16 ip6mh_cksum; + /* Followed by type specific messages */ + __u8 data[0]; +} __attribute__ ((__packed__)); + +#define IP6_MH_TYPE_BRR 0 /* Binding Refresh Request */ +#define IP6_MH_TYPE_HOTI 1 /* HOTI Message */ +#define IP6_MH_TYPE_COTI 2 /* COTI Message */ +#define IP6_MH_TYPE_HOT 3 /* HOT Message */ +#define IP6_MH_TYPE_COT 4 /* COT Message */ +#define IP6_MH_TYPE_BU 5 /* Binding Update */ +#define IP6_MH_TYPE_BACK 6 /* Binding ACK */ +#define IP6_MH_TYPE_BERROR 7 /* Binding Error */ +#define IP6_MH_TYPE_MAX IP6_MH_TYPE_BERROR + extern int mip6_init(void); extern void mip6_fini(void); -- cgit v1.3-8-gc7d7 From 7be96f7628469e56f91d51f13b03e9bcff113c7f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:35:31 -0700 Subject: [IPV6] MIP6: Add receiving mobility header functions through raw socket. Like ICMPv6, mobility header is handled through raw socket. In inbound case, check only whether ICMPv6 error should be sent as a reply or not by kernel. Based on MIPL2 kernel patch. This patch was also written by: Ville Nuorvala This patch was also written by: Antti Tuominen Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/mip6.h | 4 +++ net/ipv6/mip6.c | 83 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/raw.c | 29 ++++++++++++++++++- 3 files changed, 115 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/mip6.h b/include/net/mip6.h index fd43178faace..68263c6d9996 100644 --- a/include/net/mip6.h +++ b/include/net/mip6.h @@ -25,6 +25,9 @@ #ifndef _NET_MIP6_H #define _NET_MIP6_H +#include +#include + #define MIP6_OPT_PAD_1 0 #define MIP6_OPT_PAD_N 1 @@ -53,5 +56,6 @@ struct ip6_mh { extern int mip6_init(void); extern void mip6_fini(void); +extern int mip6_mh_filter(struct sock *sk, struct sk_buff *skb); #endif diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index a8adf891fe0e..7b5f89321482 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -26,7 +26,10 @@ #include #include #include +#include +#include #include +#include #include #include @@ -55,6 +58,86 @@ static inline void *mip6_padn(__u8 *data, __u8 padlen) return data + padlen; } +static inline void mip6_param_prob(struct sk_buff *skb, int code, int pos) +{ + icmpv6_send(skb, ICMPV6_PARAMPROB, code, pos, skb->dev); +} + +static int mip6_mh_len(int type) +{ + int len = 0; + + switch (type) { + case IP6_MH_TYPE_BRR: + len = 0; + break; + case IP6_MH_TYPE_HOTI: + case IP6_MH_TYPE_COTI: + case IP6_MH_TYPE_BU: + case IP6_MH_TYPE_BACK: + len = 1; + break; + case IP6_MH_TYPE_HOT: + case IP6_MH_TYPE_COT: + case IP6_MH_TYPE_BERROR: + len = 2; + break; + } + return len; +} + +int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) +{ + struct ip6_mh *mh; + int mhlen; + + if (!pskb_may_pull(skb, (skb->h.raw - skb->data) + 8) || + !pskb_may_pull(skb, (skb->h.raw - skb->data) + ((skb->h.raw[1] + 1) << 3))) + return -1; + + mh = (struct ip6_mh *)skb->h.raw; + + if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", + mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); + mip6_param_prob(skb, 0, (&mh->ip6mh_hdrlen) - skb->nh.raw); + return -1; + } + mhlen = (mh->ip6mh_hdrlen + 1) << 3; + + if (skb->ip_summed == CHECKSUM_COMPLETE) { + skb->ip_summed = CHECKSUM_UNNECESSARY; + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb->csum)) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH hw checksum failed\n"); + skb->ip_summed = CHECKSUM_NONE; + } + } + if (skb->ip_summed == CHECKSUM_NONE) { + if (csum_ipv6_magic(&skb->nh.ipv6h->saddr, + &skb->nh.ipv6h->daddr, + mhlen, IPPROTO_MH, + skb_checksum(skb, 0, mhlen, 0))) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH checksum failed [%04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x > %04x:%04x:%04x:%04x:%04x:%04x:%04x:%04x]\n", + NIP6(skb->nh.ipv6h->saddr), + NIP6(skb->nh.ipv6h->daddr)); + return -1; + } + skb->ip_summed = CHECKSUM_UNNECESSARY; + } + + if (mh->ip6mh_proto != IPPROTO_NONE) { + LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", + mh->ip6mh_proto); + mip6_param_prob(skb, 0, (&mh->ip6mh_proto) - skb->nh.raw); + return -1; + } + + return 0; +} + static int mip6_destopt_input(struct xfrm_state *x, struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index d4af1cb5e19f..ecca8aae3c4b 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -50,6 +50,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif #include #include @@ -169,8 +172,32 @@ int ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) sk = __raw_v6_lookup(sk, nexthdr, daddr, saddr, IP6CB(skb)->iif); while (sk) { + int filtered; + delivered = 1; - if (nexthdr != IPPROTO_ICMPV6 || !icmpv6_filter(sk, skb)) { + switch (nexthdr) { + case IPPROTO_ICMPV6: + filtered = icmpv6_filter(sk, skb); + break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + /* XXX: To validate MH only once for each packet, + * this is placed here. It should be after checking + * xfrm policy, however it doesn't. The checking xfrm + * policy is placed in rawv6_rcv() because it is + * required for each socket. + */ + filtered = mip6_mh_filter(sk, skb); + break; +#endif + default: + filtered = 0; + break; + } + + if (filtered < 0) + break; + if (filtered == 0) { struct sk_buff *clone = skb_clone(skb, GFP_ATOMIC); /* Not releasing hash table! */ -- cgit v1.3-8-gc7d7 From 2ce4272a699c731b9736d76126dc742353e381db Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:39:03 -0700 Subject: [IPV6] MIP6: Transformation support mobility header. Transformation support mobility header. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 5 +++++ net/ipv6/xfrm6_policy.c | 15 +++++++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 7f1630630dcf..13488e7ba68c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -546,6 +546,11 @@ u16 xfrm_flowi_sport(struct flowi *fl) case IPPROTO_ICMPV6: port = htons(fl->fl_icmp_type); break; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + port = htons(fl->fl_mh_type); + break; +#endif default: port = 0; /*XXX*/ } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 729b4748d6d3..98c2fe449b3f 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -18,6 +18,9 @@ #include #include #include +#ifdef CONFIG_IPV6_MIP6 +#include +#endif static struct dst_ops xfrm6_dst_ops; static struct xfrm_policy_afinfo xfrm6_policy_afinfo; @@ -270,6 +273,18 @@ _decode_session6(struct sk_buff *skb, struct flowi *fl) fl->proto = nexthdr; return; +#ifdef CONFIG_IPV6_MIP6 + case IPPROTO_MH: + if (pskb_may_pull(skb, skb->nh.raw + offset + 3 - skb->data)) { + struct ip6_mh *mh; + mh = (struct ip6_mh *)exthdr; + + fl->fl_mh_type = mh->ip6mh_type; + } + fl->proto = nexthdr; + return; +#endif + /* XXX Why are there these headers? */ case IPPROTO_AH: case IPPROTO_ESP: -- cgit v1.3-8-gc7d7 From df0ba92a99ca757039dfa84a929281ea3f7a50e8 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:41:00 -0700 Subject: [XFRM]: Trace which secpath state is reject factor. For Mobile IPv6 usage, it is required to trace which secpath state is reject factor in order to notify it to user space (to know the address which cannot be used route optimized communication). Based on MIPL2 kernel patch. This patch was also written by: Henrik Petander Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 + net/xfrm/xfrm_policy.c | 55 +++++++++++++++++++++++++++++++++++++++++++------- 2 files changed, 49 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 13488e7ba68c..9ebbdc1dd471 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -273,6 +273,7 @@ struct xfrm_type void (*destructor)(struct xfrm_state *); int (*input)(struct xfrm_state *, struct sk_buff *skb); int (*output)(struct xfrm_state *, struct sk_buff *pskb); + int (*reject)(struct xfrm_state *, struct sk_buff *, struct flowi *); int (*hdr_offset)(struct xfrm_state *, struct sk_buff *, u8 **); xfrm_address_t *(*local_addr)(struct xfrm_state *, xfrm_address_t *); xfrm_address_t *(*remote_addr)(struct xfrm_state *, xfrm_address_t *); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index ad2a5cba1f5b..d125a2649037 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -988,6 +988,23 @@ error: } EXPORT_SYMBOL(xfrm_lookup); +static inline int +xfrm_secpath_reject(int idx, struct sk_buff *skb, struct flowi *fl) +{ + struct xfrm_state *x; + int err; + + if (!skb->sp || idx < 0 || idx >= skb->sp->len) + return 0; + x = skb->sp->xvec[idx]; + if (!x->type->reject) + return 0; + xfrm_state_hold(x); + err = x->type->reject(x, skb, fl); + xfrm_state_put(x); + return err; +} + /* When skb is transformed back to its "native" form, we have to * check policy restrictions. At the moment we make this in maximally * stupid way. Shame on me. :-) Of course, connected sockets must @@ -1010,6 +1027,13 @@ xfrm_state_ok(struct xfrm_tmpl *tmpl, struct xfrm_state *x, xfrm_state_addr_cmp(tmpl, x, family)); } +/* + * 0 or more than 0 is returned when validation is succeeded (either bypass + * because of optional transport mode, or next index of the mathced secpath + * state with the template. + * -1 is returned when no matching template is found. + * Otherwise "-2 - errored_index" is returned. + */ static inline int xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, unsigned short family) @@ -1024,8 +1048,11 @@ xfrm_policy_ok(struct xfrm_tmpl *tmpl, struct sec_path *sp, int start, for (; idx < sp->len; idx++) { if (xfrm_state_ok(tmpl, sp->xvec[idx], family)) return ++idx; - if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) + if (sp->xvec[idx]->props.mode != XFRM_MODE_TRANSPORT) { + if (start == -1) + start = -2-idx; break; + } } return start; } @@ -1046,11 +1073,14 @@ xfrm_decode_session(struct sk_buff *skb, struct flowi *fl, unsigned short family } EXPORT_SYMBOL(xfrm_decode_session); -static inline int secpath_has_nontransport(struct sec_path *sp, int k) +static inline int secpath_has_nontransport(struct sec_path *sp, int k, int *idxp) { for (; k < sp->len; k++) { - if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) + if (sp->xvec[k]->props.mode != XFRM_MODE_TRANSPORT) { + if (idxp) + *idxp = k; return 1; + } } return 0; @@ -1062,6 +1092,8 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct xfrm_policy *pol; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); + int xerr_idx = -1; + int *xerr_idxp = &xerr_idx; if (xfrm_decode_session(skb, &fl, family) < 0) return 0; @@ -1086,8 +1118,13 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol = flow_cache_lookup(&fl, family, fl_dir, xfrm_policy_lookup); - if (!pol) - return !skb->sp || !secpath_has_nontransport(skb->sp, 0); + if (!pol) { + if (skb->sp && secpath_has_nontransport(skb->sp, 0, xerr_idxp)) { + xfrm_secpath_reject(xerr_idx, skb, &fl); + return 0; + } + return 1; + } pol->curlft.use_time = (unsigned long)xtime.tv_sec; @@ -1107,11 +1144,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, */ for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); - if (k < 0) + if (k < 0) { + if (k < -1 && xerr_idxp) + *xerr_idxp = -(2+k); goto reject; + } } - if (secpath_has_nontransport(sp, k)) + if (secpath_has_nontransport(sp, k, xerr_idxp)) goto reject; xfrm_pol_put(pol); @@ -1119,6 +1159,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, } reject: + xfrm_secpath_reject(xerr_idx, skb, &fl); xfrm_pol_put(pol); return 0; } -- cgit v1.3-8-gc7d7 From 97a64b4577ae2bc5599dbd008a3cd9e25de9b9f5 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 20:44:06 -0700 Subject: [XFRM]: Introduce XFRM_MSG_REPORT. XFRM_MSG_REPORT is a message as notification of state protocol and selector from kernel to user-space. Mobile IPv6 will use it when inbound reject is occurred at route optimization to make user-space know a binding error requirement. Based on MIPL2 kernel patch. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 12 ++++++++++++ include/net/xfrm.h | 2 ++ net/xfrm/xfrm_state.c | 19 +++++++++++++++++++ net/xfrm/xfrm_user.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 79 insertions(+) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 1d8c1f22c12d..4009f4445fa9 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -166,6 +166,10 @@ enum { #define XFRM_MSG_NEWAE XFRM_MSG_NEWAE XFRM_MSG_GETAE, #define XFRM_MSG_GETAE XFRM_MSG_GETAE + + XFRM_MSG_REPORT, +#define XFRM_MSG_REPORT XFRM_MSG_REPORT + __XFRM_MSG_MAX }; #define XFRM_MSG_MAX (__XFRM_MSG_MAX - 1) @@ -325,12 +329,18 @@ struct xfrm_usersa_flush { __u8 proto; }; +struct xfrm_user_report { + __u8 proto; + struct xfrm_selector sel; +}; + #ifndef __KERNEL__ /* backwards compatibility for userspace */ #define XFRMGRP_ACQUIRE 1 #define XFRMGRP_EXPIRE 2 #define XFRMGRP_SA 4 #define XFRMGRP_POLICY 8 +#define XFRMGRP_REPORT 0x10 #endif enum xfrm_nlgroups { @@ -346,6 +356,8 @@ enum xfrm_nlgroups { #define XFRMNLGRP_POLICY XFRMNLGRP_POLICY XFRMNLGRP_AEVENTS, #define XFRMNLGRP_AEVENTS XFRMNLGRP_AEVENTS + XFRMNLGRP_REPORT, +#define XFRMNLGRP_REPORT XFRMNLGRP_REPORT __XFRMNLGRP_MAX }; #define XFRMNLGRP_MAX (__XFRMNLGRP_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 9ebbdc1dd471..0b223eed4c9b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -381,6 +381,7 @@ struct xfrm_mgr struct xfrm_policy *(*compile_policy)(struct sock *sk, int opt, u8 *data, int len, int *dir); int (*new_mapping)(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); int (*notify_policy)(struct xfrm_policy *x, int dir, struct km_event *c); + int (*report)(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); }; extern int xfrm_register_km(struct xfrm_mgr *km); @@ -1043,6 +1044,7 @@ extern void xfrm_init_pmtu(struct dst_entry *dst); extern wait_queue_head_t km_waitq; extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, u16 sport); extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3da89c01ea71..a26ef6952c30 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1055,6 +1055,25 @@ void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) } EXPORT_SYMBOL(km_policy_expired); +int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr) +{ + int err = -EINVAL; + int ret; + struct xfrm_mgr *km; + + read_lock(&xfrm_km_lock); + list_for_each_entry(km, &xfrm_km_list, list) { + if (km->report) { + ret = km->report(proto, sel, addr); + if (!ret) + err = ret; + } + } + read_unlock(&xfrm_km_lock); + return err; +} +EXPORT_SYMBOL(km_report); + int xfrm_user_policy(struct sock *sk, int optname, u8 __user *optval, int optlen) { int err; diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 770bd2410749..7303b820bea4 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -1491,6 +1491,7 @@ static const int xfrm_msg_min[XFRM_NR_MSGTYPES] = { [XFRM_MSG_FLUSHPOLICY - XFRM_MSG_BASE] = NLMSG_LENGTH(0), [XFRM_MSG_NEWAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), [XFRM_MSG_GETAE - XFRM_MSG_BASE] = XMSGSIZE(xfrm_aevent_id), + [XFRM_MSG_REPORT - XFRM_MSG_BASE] = XMSGSIZE(xfrm_user_report), }; #undef XMSGSIZE @@ -2058,12 +2059,57 @@ static int xfrm_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_ev } +static int build_report(struct sk_buff *skb, u8 proto, + struct xfrm_selector *sel, xfrm_address_t *addr) +{ + struct xfrm_user_report *ur; + struct nlmsghdr *nlh; + unsigned char *b = skb->tail; + + nlh = NLMSG_PUT(skb, 0, 0, XFRM_MSG_REPORT, sizeof(*ur)); + ur = NLMSG_DATA(nlh); + nlh->nlmsg_flags = 0; + + ur->proto = proto; + memcpy(&ur->sel, sel, sizeof(ur->sel)); + + if (addr) + RTA_PUT(skb, XFRMA_COADDR, sizeof(*addr), addr); + + nlh->nlmsg_len = skb->tail - b; + return skb->len; + +nlmsg_failure: +rtattr_failure: + skb_trim(skb, b - skb->data); + return -1; +} + +static int xfrm_send_report(u8 proto, struct xfrm_selector *sel, + xfrm_address_t *addr) +{ + struct sk_buff *skb; + size_t len; + + len = NLMSG_ALIGN(NLMSG_LENGTH(sizeof(struct xfrm_user_report))); + skb = alloc_skb(len, GFP_ATOMIC); + if (skb == NULL) + return -ENOMEM; + + if (build_report(skb, proto, sel, addr) < 0) + BUG(); + + NETLINK_CB(skb).dst_group = XFRMNLGRP_REPORT; + return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_REPORT, GFP_ATOMIC); +} + static struct xfrm_mgr netlink_mgr = { .id = "netlink", .notify = xfrm_send_state_notify, .acquire = xfrm_send_acquire, .compile_policy = xfrm_compile_policy, .notify_policy = xfrm_send_policy_notify, + .report = xfrm_send_report, }; static int __init xfrm_user_init(void) -- cgit v1.3-8-gc7d7 From 4e81bb8336a0ac50289d4d4c7a55e559b994ee8f Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:43:30 -0700 Subject: [XFRM] POLICY: sub policy support. Sub policy is introduced. Main and sub policy are applied the same flow. (Policy that current kernel uses is named as main.) It is required another transformation policy management to keep IPsec and Mobile IPv6 lives separate. Policy which lives shorter time in kernel should be a sub i.e. normally main is for IPsec and sub is for Mobile IPv6. (Such usage as two IPsec policies on different database can be used, too.) Limitation or TODOs: - Sub policy is not supported for per socket one (it is always inserted as main). - Current kernel makes cached outbound with flowi to skip searching database. However this patch makes it disabled only when "two policies are used and the first matched one is bypass case" because neither flowi nor bundle information knows about transformation template size. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/linux/xfrm.h | 7 ++ include/net/xfrm.h | 45 +++++++-- net/xfrm/xfrm_policy.c | 252 ++++++++++++++++++++++++++++++++++++++++++------- 3 files changed, 260 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 4009f4445fa9..492fb9818747 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -102,6 +102,13 @@ struct xfrm_stats { __u32 integrity_failed; }; +enum +{ + XFRM_POLICY_TYPE_MAIN = 0, + XFRM_POLICY_TYPE_SUB = 1, + XFRM_POLICY_TYPE_MAX = 2 +}; + enum { XFRM_POLICY_IN = 0, diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0b223eed4c9b..4655ca25f808 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -341,6 +341,7 @@ struct xfrm_policy atomic_t refcnt; struct timer_list timer; + u8 type; u32 priority; u32 index; struct xfrm_selector selector; @@ -389,6 +390,19 @@ extern int xfrm_unregister_km(struct xfrm_mgr *km); extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; +#ifdef CONFIG_XFRM_SUB_POLICY +extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; + +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]); +} +#else +static inline int xfrm_policy_lists_empty(int dir) +{ + return (!xfrm_policy_list[dir]); +} +#endif static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -404,6 +418,20 @@ static inline void xfrm_pol_put(struct xfrm_policy *policy) __xfrm_policy_destroy(policy); } +#ifdef CONFIG_XFRM_SUB_POLICY +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + int i; + for (i = npols - 1; i >= 0; --i) + xfrm_pol_put(pols[i]); +} +#else +static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) +{ + xfrm_pol_put(pols[0]); +} +#endif + #define XFRM_DST_HSIZE 1024 static __inline__ @@ -737,8 +765,8 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk { if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - - return (!xfrm_policy_list[dir] && !skb->sp) || + + return (xfrm_policy_lists_empty(dir) && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -758,7 +786,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return !xfrm_policy_list[XFRM_POLICY_OUT] || + return xfrm_policy_lists_empty(XFRM_POLICY_OUT) || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } @@ -1023,18 +1051,19 @@ static inline int xfrm_dst_lookup(struct xfrm_dst **dst, struct flowi *fl, unsig #endif struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp); -extern int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), void *); +extern int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *); int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete); -void xfrm_policy_flush(void); +struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); +void xfrm_policy_flush(u8 type); u32 xfrm_get_acqseq(void); void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); -extern void xfrm_policy_flush(void); +extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); extern void xfrm_flush_all_bundles(void); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index d125a2649037..96de6c76ed57 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -32,6 +32,24 @@ static DEFINE_RWLOCK(xfrm_policy_lock); struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; EXPORT_SYMBOL(xfrm_policy_list); +#ifdef CONFIG_XFRM_SUB_POLICY +struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_list_sub); + +#define XFRM_POLICY_LISTS(type) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \ + xfrm_policy_list) +#define XFRM_POLICY_LISTHEAD(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \ + xfrm_policy_list[dir]) +#define XFRM_POLICY_LISTHEADP(type, dir) \ + ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \ + &xfrm_policy_list[dir]) +#else +#define XFRM_POLICY_LISTS(type) xfrm_policy_list +#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir] +#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir] +#endif static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -397,7 +415,7 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ -static u32 xfrm_gen_index(int dir) +static u32 xfrm_gen_index(u8 type, int dir) { u32 idx; struct xfrm_policy *p; @@ -408,7 +426,7 @@ static u32 xfrm_gen_index(int dir) idx_generator += 8; if (idx == 0) idx = 8; - for (p = xfrm_policy_list[dir]; p; p = p->next) { + for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { if (p->index == idx) break; } @@ -425,7 +443,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL;) { + for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { @@ -452,7 +470,7 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) policy->next = *p; *p = policy; atomic_inc(&flow_cache_genid); - policy->index = delpol ? delpol->index : xfrm_gen_index(dir); + policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -493,13 +511,14 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) } EXPORT_SYMBOL(xfrm_policy_insert); -struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, +struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, + struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && (xfrm_sec_ctx_match(ctx, pol->security))) { xfrm_pol_hold(pol); @@ -518,12 +537,12 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(int dir, struct xfrm_selector *sel, } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); -struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) +struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { struct xfrm_policy *pol, **p; write_lock_bh(&xfrm_policy_lock); - for (p = &xfrm_policy_list[dir]; (pol=*p)!=NULL; p = &pol->next) { + for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { if (pol->index == id) { xfrm_pol_hold(pol); if (delete) @@ -541,15 +560,16 @@ struct xfrm_policy *xfrm_policy_byid(int dir, u32 id, int delete) } EXPORT_SYMBOL(xfrm_policy_byid); -void xfrm_policy_flush(void) +void xfrm_policy_flush(u8 type) { struct xfrm_policy *xp; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type); int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = xfrm_policy_list[dir]) != NULL) { - xfrm_policy_list[dir] = xp->next; + while ((xp = p_list[dir]) != NULL) { + p_list[dir] = xp->next; write_unlock_bh(&xfrm_policy_lock); xfrm_policy_kill(xp); @@ -562,7 +582,7 @@ void xfrm_policy_flush(void) } EXPORT_SYMBOL(xfrm_policy_flush); -int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), +int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { struct xfrm_policy *xp; @@ -572,7 +592,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), read_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) count++; } @@ -582,7 +602,7 @@ int xfrm_policy_walk(int (*func)(struct xfrm_policy *, int, int, void*), } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = xfrm_policy_list[dir]; xp; xp = xp->next) { + for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { error = func(xp, dir%XFRM_POLICY_MAX, --count, data); if (error) goto out; @@ -597,13 +617,13 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, - void **objp, atomic_t **obj_refp) +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) { struct xfrm_policy *pol; read_lock_bh(&xfrm_policy_lock); - for (pol = xfrm_policy_list[dir]; pol; pol = pol->next) { + for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { struct xfrm_selector *sel = &pol->selector; int match; @@ -620,6 +640,25 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, } } read_unlock_bh(&xfrm_policy_lock); + + return pol; +} + +static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, + void **objp, atomic_t **obj_refp) +{ + struct xfrm_policy *pol; + +#ifdef CONFIG_XFRM_SUB_POLICY + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_SUB, fl, family, dir); + if (pol) + goto end; +#endif + pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); + +#ifdef CONFIG_XFRM_SUB_POLICY + end: +#endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; } @@ -665,8 +704,10 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - pol->next = xfrm_policy_list[dir]; - xfrm_policy_list[dir] = pol; + struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); + + pol->next = p_list[dir]; + p_list[dir] = pol; xfrm_pol_hold(pol); } @@ -675,7 +716,7 @@ static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, { struct xfrm_policy **polp; - for (polp = &xfrm_policy_list[dir]; + for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); *polp != NULL; polp = &(*polp)->next) { if (*polp == pol) { *polp = pol->next; @@ -704,12 +745,17 @@ int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol) { struct xfrm_policy *old_pol; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pol && pol->type != XFRM_POLICY_TYPE_MAIN) + return -EINVAL; +#endif + write_lock_bh(&xfrm_policy_lock); old_pol = sk->sk_policy[dir]; sk->sk_policy[dir] = pol; if (pol) { pol->curlft.add_time = (unsigned long)xtime.tv_sec; - pol->index = xfrm_gen_index(XFRM_POLICY_MAX+dir); + pol->index = xfrm_gen_index(pol->type, XFRM_POLICY_MAX+dir); __xfrm_policy_link(pol, XFRM_POLICY_MAX+dir); } if (old_pol) @@ -738,6 +784,7 @@ static struct xfrm_policy *clone_policy(struct xfrm_policy *old, int dir) newp->flags = old->flags; newp->xfrm_nr = old->xfrm_nr; newp->index = old->index; + newp->type = old->type; memcpy(newp->xfrm_vec, old->xfrm_vec, newp->xfrm_nr*sizeof(struct xfrm_tmpl)); write_lock_bh(&xfrm_policy_lock); @@ -764,9 +811,9 @@ int __xfrm_sk_clone_policy(struct sock *sk) /* Resolve list of templates for the flow, given policy. */ static int -xfrm_tmpl_resolve(struct xfrm_policy *policy, struct flowi *fl, - struct xfrm_state **xfrm, - unsigned short family) +xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) { int nx; int i, error; @@ -809,6 +856,38 @@ fail: return error; } +static int +xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, + struct xfrm_state **xfrm, + unsigned short family) +{ + int cnx = 0; + int error; + int ret; + int i; + + for (i = 0; i < npols; i++) { + if (cnx + pols[i]->xfrm_nr >= XFRM_MAX_DEPTH) { + error = -ENOBUFS; + goto fail; + } + ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + if (ret < 0) { + error = ret; + goto fail; + } else + cnx += ret; + } + + return cnx; + + fail: + for (cnx--; cnx>=0; cnx--) + xfrm_state_put(xfrm[cnx]); + return error; + +} + /* Check that the bundle accepts the flow and its components are * still valid. */ @@ -855,6 +934,11 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, struct sock *sk, int flags) { struct xfrm_policy *policy; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols; + int pol_dead; + int xfrm_nr; + int pi; struct xfrm_state *xfrm[XFRM_MAX_DEPTH]; struct dst_entry *dst, *dst_orig = *dst_p; int nx = 0; @@ -866,12 +950,18 @@ int xfrm_lookup(struct dst_entry **dst_p, struct flowi *fl, restart: genid = atomic_read(&flow_cache_genid); policy = NULL; + for (pi = 0; pi < ARRAY_SIZE(pols); pi++) + pols[pi] = NULL; + npols = 0; + pol_dead = 0; + xfrm_nr = 0; + if (sk && sk->sk_policy[1]) policy = xfrm_sk_policy_lookup(sk, XFRM_POLICY_OUT, fl); if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || !xfrm_policy_list[XFRM_POLICY_OUT]) + if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) return 0; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -883,6 +973,9 @@ restart: family = dst_orig->ops->family; policy->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = policy; + npols ++; + xfrm_nr += pols[0]->xfrm_nr; switch (policy->action) { case XFRM_POLICY_BLOCK: @@ -891,11 +984,13 @@ restart: goto error; case XFRM_POLICY_ALLOW: +#ifndef CONFIG_XFRM_SUB_POLICY if (policy->xfrm_nr == 0) { /* Flow passes not transformed. */ xfrm_pol_put(policy); return 0; } +#endif /* Try to find matching bundle. * @@ -911,7 +1006,36 @@ restart: if (dst) break; - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + fl, family, + XFRM_POLICY_OUT); + if (pols[1]) { + if (pols[1]->action == XFRM_POLICY_BLOCK) { + err = -EPERM; + goto error; + } + npols ++; + xfrm_nr += pols[1]->xfrm_nr; + } + } + + /* + * Because neither flowi nor bundle information knows about + * transformation template size. On more than one policy usage + * we can realize whether all of them is bypass or not after + * they are searched. See above not-transformed bypass + * is surrounded by non-sub policy configuration, too. + */ + if (xfrm_nr == 0) { + /* Flow passes not transformed. */ + xfrm_pols_put(pols, npols); + return 0; + } + +#endif + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (unlikely(nx<0)) { err = nx; @@ -924,7 +1048,7 @@ restart: set_current_state(TASK_RUNNING); remove_wait_queue(&km_waitq, &wait); - nx = xfrm_tmpl_resolve(policy, fl, xfrm, family); + nx = xfrm_tmpl_resolve(pols, npols, fl, xfrm, family); if (nx == -EAGAIN && signal_pending(current)) { err = -ERESTART; @@ -932,7 +1056,7 @@ restart: } if (nx == -EAGAIN || genid != atomic_read(&flow_cache_genid)) { - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); goto restart; } err = nx; @@ -942,7 +1066,7 @@ restart: } if (nx == 0) { /* Flow passes not transformed. */ - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; } @@ -956,8 +1080,14 @@ restart: goto error; } + for (pi = 0; pi < npols; pi++) { + read_lock_bh(&pols[pi]->lock); + pol_dead |= pols[pi]->dead; + read_unlock_bh(&pols[pi]->lock); + } + write_lock_bh(&policy->lock); - if (unlikely(policy->dead || stale_bundle(dst))) { + if (unlikely(pol_dead || stale_bundle(dst))) { /* Wow! While we worked on resolving, this * policy has gone. Retry. It is not paranoia, * we just cannot enlist new bundle to dead object. @@ -977,12 +1107,12 @@ restart: } *dst_p = dst; dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); return 0; error: dst_release(dst_orig); - xfrm_pol_put(policy); + xfrm_pols_put(pols, npols); *dst_p = NULL; return err; } @@ -1090,6 +1220,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, unsigned short family) { struct xfrm_policy *pol; + struct xfrm_policy *pols[XFRM_POLICY_TYPE_MAX]; + int npols = 0; + int xfrm_nr; + int pi; struct flowi fl; u8 fl_dir = policy_to_flow_dir(dir); int xerr_idx = -1; @@ -1128,22 +1262,50 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, pol->curlft.use_time = (unsigned long)xtime.tv_sec; + pols[0] = pol; + npols ++; +#ifdef CONFIG_XFRM_SUB_POLICY + if (pols[0]->type != XFRM_POLICY_TYPE_MAIN) { + pols[1] = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, + &fl, family, + XFRM_POLICY_IN); + if (pols[1]) { + pols[1]->curlft.use_time = (unsigned long)xtime.tv_sec; + npols ++; + } + } +#endif + if (pol->action == XFRM_POLICY_ALLOW) { struct sec_path *sp; static struct sec_path dummy; + struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl **tpp = tp; + int ti = 0; int i, k; if ((sp = skb->sp) == NULL) sp = &dummy; + for (pi = 0; pi < npols; pi++) { + if (pols[pi] != pol && + pols[pi]->action != XFRM_POLICY_ALLOW) + goto reject; + if (ti + pols[pi]->xfrm_nr >= XFRM_MAX_DEPTH) + goto reject_error; + for (i = 0; i < pols[pi]->xfrm_nr; i++) + tpp[ti++] = &pols[pi]->xfrm_vec[i]; + } + xfrm_nr = ti; + /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. * Order is _important_. Later we will implement * some barriers, but at the moment barriers * are implied between each two transformations. */ - for (i = pol->xfrm_nr-1, k = 0; i >= 0; i--) { - k = xfrm_policy_ok(pol->xfrm_vec+i, sp, k, family); + for (i = xfrm_nr-1, k = 0; i >= 0; i--) { + k = xfrm_policy_ok(tpp[i], sp, k, family); if (k < 0) { if (k < -1 && xerr_idxp) *xerr_idxp = -(2+k); @@ -1154,13 +1316,14 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, if (secpath_has_nontransport(sp, k, xerr_idxp)) goto reject; - xfrm_pol_put(pol); + xfrm_pols_put(pols, npols); return 1; } reject: xfrm_secpath_reject(xerr_idx, skb, &fl); - xfrm_pol_put(pol); +reject_error: + xfrm_pols_put(pols, npols); return 0; } EXPORT_SYMBOL(__xfrm_policy_check); @@ -1246,6 +1409,23 @@ static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) read_lock_bh(&xfrm_policy_lock); for (i=0; i<2*XFRM_POLICY_MAX; i++) { +#ifdef CONFIG_XFRM_SUB_POLICY + for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = gc_list; + gc_list = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); + } + +#endif for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { write_lock(&pol->lock); dstp = &pol->bundles; -- cgit v1.3-8-gc7d7 From 41a49cc3c02ace59d4dddae91ea211c330970ee3 Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:48:31 -0700 Subject: [XFRM]: Add sorting interface for state and template. Under two transformation policies it is required to merge them. This is a platform to sort state for outbound and templates for inbound respectively. It will be used when Mobile IPv6 and IPsec are used at the same time. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/xfrm.h | 20 ++++++++++++++++++++ net/xfrm/xfrm_policy.c | 16 ++++++++++++++-- net/xfrm/xfrm_state.c | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 72 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4655ca25f808..d341603e4ba8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -254,6 +254,8 @@ struct xfrm_state_afinfo { struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create); + int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); + int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; extern int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo); @@ -1002,6 +1004,24 @@ extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); +#ifdef CONFIG_XFRM_SUB_POLICY +extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family); +extern int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family); +#else +static inline int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, + int n, unsigned short family) +{ + return -ENOSYS; +} + +static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, + int n, unsigned short family) +{ + return -ENOSYS; +} +#endif extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 96de6c76ed57..1732159ffd01 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -861,6 +861,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, struct xfrm_state **xfrm, unsigned short family) { + struct xfrm_state *tp[XFRM_MAX_DEPTH]; + struct xfrm_state **tpp = (npols > 1) ? tp : xfrm; int cnx = 0; int error; int ret; @@ -871,7 +873,8 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, error = -ENOBUFS; goto fail; } - ret = xfrm_tmpl_resolve_one(pols[i], fl, &xfrm[cnx], family); + + ret = xfrm_tmpl_resolve_one(pols[i], fl, &tpp[cnx], family); if (ret < 0) { error = ret; goto fail; @@ -879,11 +882,15 @@ xfrm_tmpl_resolve(struct xfrm_policy **pols, int npols, struct flowi *fl, cnx += ret; } + /* found states are sorted for outbound processing */ + if (npols > 1) + xfrm_state_sort(xfrm, tpp, cnx, family); + return cnx; fail: for (cnx--; cnx>=0; cnx--) - xfrm_state_put(xfrm[cnx]); + xfrm_state_put(tpp[cnx]); return error; } @@ -1280,6 +1287,7 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, struct sec_path *sp; static struct sec_path dummy; struct xfrm_tmpl *tp[XFRM_MAX_DEPTH]; + struct xfrm_tmpl *stp[XFRM_MAX_DEPTH]; struct xfrm_tmpl **tpp = tp; int ti = 0; int i, k; @@ -1297,6 +1305,10 @@ int __xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *skb, tpp[ti++] = &pols[pi]->xfrm_vec[i]; } xfrm_nr = ti; + if (npols > 1) { + xfrm_tmpl_sort(stp, tpp, xfrm_nr, family); + tpp = stp; + } /* For each tunnel xfrm, find the first matching tmpl. * For each tmpl before that, find corresponding xfrm. diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index a26ef6952c30..622e92a08d0b 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -728,6 +728,44 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, } EXPORT_SYMBOL(xfrm_find_acq); +#ifdef CONFIG_XFRM_SUB_POLICY +int +xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->tmpl_sort) + err = afinfo->tmpl_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_tmpl_sort); + +int +xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n, + unsigned short family) +{ + int err = 0; + struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); + if (!afinfo) + return -EAFNOSUPPORT; + + spin_lock_bh(&xfrm_state_lock); + if (afinfo->state_sort) + err = afinfo->state_sort(dst, src, n); + spin_unlock_bh(&xfrm_state_lock); + xfrm_state_put_afinfo(afinfo); + return err; +} +EXPORT_SYMBOL(xfrm_state_sort); +#endif + /* Silly enough, but I'm lazy to build resolution list */ static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) -- cgit v1.3-8-gc7d7 From f7b6983f0feeefcd2a594138adcffe640593d8de Mon Sep 17 00:00:00 2001 From: Masahide NAKAMURA Date: Wed, 23 Aug 2006 22:49:28 -0700 Subject: [XFRM] POLICY: Support netlink socket interface for sub policy. Sub policy can be used through netlink socket. PF_KEY uses main only and it is TODO to support sub. Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/xfrm.h | 7 +++ include/net/xfrm.h | 1 + net/key/af_key.c | 18 +++++-- net/xfrm/xfrm_user.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 142 insertions(+), 18 deletions(-) (limited to 'include/net') diff --git a/include/linux/xfrm.h b/include/linux/xfrm.h index 492fb9818747..14ecd19f4cdc 100644 --- a/include/linux/xfrm.h +++ b/include/linux/xfrm.h @@ -230,6 +230,12 @@ enum xfrm_ae_ftype_t { #define XFRM_AE_MAX (__XFRM_AE_MAX - 1) }; +struct xfrm_userpolicy_type { + __u8 type; + __u16 reserved1; + __u8 reserved2; +}; + /* Netlink message attributes. */ enum xfrm_attr_type_t { XFRMA_UNSPEC, @@ -248,6 +254,7 @@ enum xfrm_attr_type_t { XFRMA_SRCADDR, /* xfrm_address_t */ XFRMA_COADDR, /* xfrm_address_t */ XFRMA_LASTUSED, + XFRMA_POLICY_TYPE, /* struct xfrm_userpolicy_type */ __XFRMA_MAX #define XFRMA_MAX (__XFRMA_MAX - 1) diff --git a/include/net/xfrm.h b/include/net/xfrm.h index d341603e4ba8..c75b3287d8f8 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -203,6 +203,7 @@ struct km_event u32 proto; u32 byid; u32 aevent; + u32 type; } data; u32 seq; diff --git a/net/key/af_key.c b/net/key/af_key.c index 19e047b0e678..83b443ddc72f 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -1731,7 +1731,8 @@ static u32 gen_reqid(void) ++reqid; if (reqid == 0) reqid = IPSEC_MANUAL_REQID_MAX+1; - if (xfrm_policy_walk(check_reqid, (void*)&reqid) != -EEXIST) + if (xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, check_reqid, + (void*)&reqid) != -EEXIST) return reqid; } while (reqid != start); return 0; @@ -2268,7 +2269,8 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, struct sadb_msg return err; } - xp = xfrm_policy_bysel_ctx(pol->sadb_x_policy_dir-1, &sel, tmp.security, 1); + xp = xfrm_policy_bysel_ctx(XFRM_POLICY_TYPE_MAIN, pol->sadb_x_policy_dir-1, + &sel, tmp.security, 1); security_xfrm_policy_free(&tmp); if (xp == NULL) return -ENOENT; @@ -2330,7 +2332,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, struct sadb_msg *h if (dir >= XFRM_POLICY_MAX) return -EINVAL; - xp = xfrm_policy_byid(dir, pol->sadb_x_policy_id, + xp = xfrm_policy_byid(XFRM_POLICY_TYPE_MAIN, dir, pol->sadb_x_policy_id, hdr->sadb_msg_type == SADB_X_SPDDELETE2); if (xp == NULL) return -ENOENT; @@ -2378,7 +2380,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, struct sadb_msg * { struct pfkey_dump_data data = { .skb = skb, .hdr = hdr, .sk = sk }; - return xfrm_policy_walk(dump_sp, &data); + return xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_sp, &data); } static int key_notify_policy_flush(struct km_event *c) @@ -2405,7 +2407,8 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, struct sadb_msg { struct km_event c; - xfrm_policy_flush(); + xfrm_policy_flush(XFRM_POLICY_TYPE_MAIN); + c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; c.pid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; @@ -2667,6 +2670,9 @@ static int pfkey_send_notify(struct xfrm_state *x, struct km_event *c) static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_event *c) { + if (xp && xp->type != XFRM_POLICY_TYPE_MAIN) + return 0; + switch (c->event) { case XFRM_MSG_POLEXPIRE: return key_notify_policy_expire(xp, c); @@ -2675,6 +2681,8 @@ static int pfkey_send_policy_notify(struct xfrm_policy *xp, int dir, struct km_e case XFRM_MSG_UPDPOLICY: return key_notify_policy(xp, dir, c); case XFRM_MSG_FLUSHPOLICY: + if (c->data.type != XFRM_POLICY_TYPE_MAIN) + break; return key_notify_policy_flush(c); default: printk("pfkey: Unknown policy event %d\n", c->event); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 7303b820bea4..c59a78d2923a 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -786,6 +786,22 @@ static int verify_policy_dir(__u8 dir) return 0; } +static int verify_policy_type(__u8 type) +{ + switch (type) { + case XFRM_POLICY_TYPE_MAIN: +#ifdef CONFIG_XFRM_SUB_POLICY + case XFRM_POLICY_TYPE_SUB: +#endif + break; + + default: + return -EINVAL; + }; + + return 0; +} + static int verify_newpolicy_info(struct xfrm_userpolicy_info *p) { switch (p->share) { @@ -879,6 +895,29 @@ static int copy_from_user_tmpl(struct xfrm_policy *pol, struct rtattr **xfrma) return 0; } +static int copy_from_user_policy_type(u8 *tp, struct rtattr **xfrma) +{ + struct rtattr *rt = xfrma[XFRMA_POLICY_TYPE-1]; + struct xfrm_userpolicy_type *upt; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + if (rt) { + if (rt->rta_len < sizeof(*upt)) + return -EINVAL; + + upt = RTA_DATA(rt); + type = upt->type; + } + + err = verify_policy_type(type); + if (err) + return err; + + *tp = type; + return 0; +} + static void copy_from_user_policy(struct xfrm_policy *xp, struct xfrm_userpolicy_info *p) { xp->priority = p->priority; @@ -917,16 +956,20 @@ static struct xfrm_policy *xfrm_policy_construct(struct xfrm_userpolicy_info *p, copy_from_user_policy(xp, p); + err = copy_from_user_policy_type(&xp->type, xfrma); + if (err) + goto error; + if (!(err = copy_from_user_tmpl(xp, xfrma))) err = copy_from_user_sec_ctx(xp, xfrma); - - if (err) { - *errp = err; - kfree(xp); - xp = NULL; - } + if (err) + goto error; return xp; + error: + *errp = err; + kfree(xp); + return NULL; } static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) @@ -1037,6 +1080,29 @@ static inline int copy_to_user_sec_ctx(struct xfrm_policy *xp, struct sk_buff *s return 0; } +#ifdef CONFIG_XFRM_SUB_POLICY +static int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + struct xfrm_userpolicy_type upt; + + memset(&upt, 0, sizeof(upt)); + upt.type = xp->type; + + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); + + return 0; + +rtattr_failure: + return -1; +} + +#else +static inline int copy_to_user_policy_type(struct xfrm_policy *xp, struct sk_buff *skb) +{ + return 0; +} +#endif + static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr) { struct xfrm_dump_info *sp = ptr; @@ -1060,6 +1126,8 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; out: @@ -1081,7 +1149,10 @@ static int xfrm_dump_policy(struct sk_buff *skb, struct netlink_callback *cb) info.nlmsg_flags = NLM_F_MULTI; info.this_idx = 0; info.start_idx = cb->args[0]; - (void) xfrm_policy_walk(dump_one_policy, &info); + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_MAIN, dump_one_policy, &info); +#ifdef CONFIG_XFRM_SUB_POLICY + (void) xfrm_policy_walk(XFRM_POLICY_TYPE_SUB, dump_one_policy, &info); +#endif cb->args[0] = info.this_idx; return skb->len; @@ -1117,6 +1188,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr { struct xfrm_policy *xp; struct xfrm_userpolicy_id *p; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err; struct km_event c; int delete; @@ -1124,12 +1196,16 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr p = NLMSG_DATA(nlh); delete = nlh->nlmsg_type == XFRM_MSG_DELPOLICY; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + err = verify_policy_dir(p->dir); if (err) return err; if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, delete); + xp = xfrm_policy_byid(type, p->dir, p->index, delete); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1146,7 +1222,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfr if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, delete); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, delete); security_xfrm_policy_free(&tmp); } if (xp == NULL) @@ -1329,9 +1405,16 @@ out: static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, void **xfrma) { -struct km_event c; + struct km_event c; + __u8 type = XFRM_POLICY_TYPE_MAIN; + int err; + + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; - xfrm_policy_flush(); + xfrm_policy_flush(type); + c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; c.pid = nlh->nlmsg_pid; @@ -1344,10 +1427,15 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * struct xfrm_policy *xp; struct xfrm_user_polexpire *up = NLMSG_DATA(nlh); struct xfrm_userpolicy_info *p = &up->pol; + __u8 type = XFRM_POLICY_TYPE_MAIN; int err = -ENOENT; + err = copy_from_user_policy_type(&type, (struct rtattr **)xfrma); + if (err) + return err; + if (p->index) - xp = xfrm_policy_byid(p->dir, p->index, 0); + xp = xfrm_policy_byid(type, p->dir, p->index, 0); else { struct rtattr **rtattrs = (struct rtattr **)xfrma; struct rtattr *rt = rtattrs[XFRMA_SEC_CTX-1]; @@ -1364,7 +1452,7 @@ static int xfrm_add_pol_expire(struct sk_buff *skb, struct nlmsghdr *nlh, void * if ((err = security_xfrm_policy_alloc(&tmp, uctx))) return err; } - xp = xfrm_policy_bysel_ctx(p->dir, &p->sel, tmp.security, 0); + xp = xfrm_policy_bysel_ctx(type, p->dir, &p->sel, tmp.security, 0); security_xfrm_policy_free(&tmp); } @@ -1818,6 +1906,8 @@ static int build_acquire(struct sk_buff *skb, struct xfrm_state *x, goto nlmsg_failure; if (copy_to_user_state_sec_ctx(x, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; return skb->len; @@ -1898,6 +1988,7 @@ static struct xfrm_policy *xfrm_compile_policy(struct sock *sk, int opt, } copy_from_user_policy(xp, p); + xp->type = XFRM_POLICY_TYPE_MAIN; copy_templates(xp, ut, nr); if (!xp->security) { @@ -1931,6 +2022,8 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, goto nlmsg_failure; if (copy_to_user_sec_ctx(xp, skb)) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; upe->hard = !!hard; nlh->nlmsg_len = skb->tail - b; @@ -2002,6 +2095,8 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, struct km_event * copy_to_user_policy(xp, p, dir); if (copy_to_user_tmpl(xp, skb) < 0) goto nlmsg_failure; + if (copy_to_user_policy_type(xp, skb) < 0) + goto nlmsg_failure; nlh->nlmsg_len = skb->tail - b; @@ -2019,6 +2114,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) struct nlmsghdr *nlh; struct sk_buff *skb; unsigned char *b; +#ifdef CONFIG_XFRM_SUB_POLICY + struct xfrm_userpolicy_type upt; +#endif int len = NLMSG_LENGTH(0); skb = alloc_skb(len, GFP_ATOMIC); @@ -2028,6 +2126,13 @@ static int xfrm_notify_policy_flush(struct km_event *c) nlh = NLMSG_PUT(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0); + nlh->nlmsg_flags = 0; + +#ifdef CONFIG_XFRM_SUB_POLICY + memset(&upt, 0, sizeof(upt)); + upt.type = c->data.type; + RTA_PUT(skb, XFRMA_POLICY_TYPE, sizeof(upt), &upt); +#endif nlh->nlmsg_len = skb->tail - b; @@ -2035,6 +2140,9 @@ static int xfrm_notify_policy_flush(struct km_event *c) return netlink_broadcast(xfrm_nl, skb, 0, XFRMNLGRP_POLICY, GFP_ATOMIC); nlmsg_failure: +#ifdef CONFIG_XFRM_SUB_POLICY +rtattr_failure: +#endif kfree_skb(skb); return -1; } -- cgit v1.3-8-gc7d7 From 2770834c9f44afd1bfa13914c7285470775af657 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:13:10 -0700 Subject: [XFRM]: Pull xfrm_state_bydst hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 16 ------- net/ipv4/xfrm4_state.c | 53 ------------------------ net/ipv6/xfrm6_state.c | 56 ------------------------- net/xfrm/xfrm_state.c | 110 ++++++++++++++++++++++++++++++++++++++++++++----- 4 files changed, 100 insertions(+), 135 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c75b3287d8f8..cc83443f301e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,7 +243,6 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bydst; struct list_head *state_bysrc; struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); @@ -252,9 +251,6 @@ struct xfrm_state_afinfo { xfrm_address_t *daddr, xfrm_address_t *saddr); struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); - struct xfrm_state *(*find_acq)(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -455,18 +451,6 @@ unsigned __xfrm6_dst_hash(xfrm_address_t *addr) return h; } -static __inline__ -unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_dst_hash(addr); - case AF_INET6: - return __xfrm6_dst_hash(addr); - } - return 0; -} - static __inline__ unsigned __xfrm4_src_hash(xfrm_address_t *addr) { diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 616be131b4e3..9dc1afc17b6d 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -88,65 +88,12 @@ __xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, return NULL; } -static struct xfrm_state * -__xfrm4_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm4_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm4_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET && - daddr->a4 == x->id.daddr.a4 && - mode == x->props.mode && - proto == x->id.proto && - saddr->a4 == x->props.saddr.a4 && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - x0->sel.daddr.a4 = daddr->a4; - x0->sel.saddr.a4 = saddr->a4; - x0->sel.prefixlen_d = 32; - x0->sel.prefixlen_s = 32; - x0->props.saddr.a4 = saddr->a4; - x0->km.state = XFRM_STATE_ACQ; - x0->id.daddr.a4 = daddr->a4; - x0->id.proto = proto; - x0->props.family = AF_INET; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->props.family = AF_INET; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm4_state_afinfo.state_bydst+h); - h = __xfrm4_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm4_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, .state_lookup = __xfrm4_state_lookup, .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, - .find_acq = __xfrm4_find_acq, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 6269584e610e..40fcaab7e028 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -101,61 +101,6 @@ __xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) return NULL; } -static struct xfrm_state * -__xfrm6_find_acq(u8 mode, u32 reqid, u8 proto, - xfrm_address_t *daddr, xfrm_address_t *saddr, - int create) -{ - struct xfrm_state *x, *x0; - unsigned h = __xfrm6_dst_hash(daddr); - - x0 = NULL; - - list_for_each_entry(x, xfrm6_state_afinfo.state_bydst+h, bydst) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - mode == x->props.mode && - proto == x->id.proto && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - reqid == x->props.reqid && - x->km.state == XFRM_STATE_ACQ && - !x->id.spi) { - x0 = x; - break; - } - } - if (!x0 && create && (x0 = xfrm_state_alloc()) != NULL) { - ipv6_addr_copy((struct in6_addr *)x0->sel.daddr.a6, - (struct in6_addr *)daddr); - ipv6_addr_copy((struct in6_addr *)x0->sel.saddr.a6, - (struct in6_addr *)saddr); - x0->sel.prefixlen_d = 128; - x0->sel.prefixlen_s = 128; - ipv6_addr_copy((struct in6_addr *)x0->props.saddr.a6, - (struct in6_addr *)saddr); - x0->km.state = XFRM_STATE_ACQ; - ipv6_addr_copy((struct in6_addr *)x0->id.daddr.a6, - (struct in6_addr *)daddr); - x0->id.proto = proto; - x0->props.family = AF_INET6; - x0->props.mode = mode; - x0->props.reqid = reqid; - x0->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; - xfrm_state_hold(x0); - x0->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; - add_timer(&x0->timer); - xfrm_state_hold(x0); - list_add_tail(&x0->bydst, xfrm6_state_afinfo.state_bydst+h); - h = __xfrm6_src_hash(saddr); - xfrm_state_hold(x0); - list_add_tail(&x0->bysrc, xfrm6_state_afinfo.state_bysrc+h); - wake_up(&km_waitq); - } - if (x0) - xfrm_state_hold(x0); - return x0; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -280,7 +225,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = { .init_tempsel = __xfrm6_init_tempsel, .state_lookup = __xfrm6_state_lookup, .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, - .find_acq = __xfrm6_find_acq, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 622e92a08d0b..80f5f9dc2b9e 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -48,6 +48,18 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_dst_hash(addr); + case AF_INET6: + return __xfrm6_dst_hash(addr); + } + return 0; +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -489,6 +501,89 @@ void xfrm_state_insert(struct xfrm_state *x) } EXPORT_SYMBOL(xfrm_state_insert); +/* xfrm_state_lock is held */ +static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) +{ + unsigned int h = xfrm_dst_hash(daddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + if (x->props.reqid != reqid || + x->props.mode != mode || + x->props.family != family || + x->km.state != XFRM_STATE_ACQ || + x->id.spi != 0) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr) || + !ipv6_addr_equal((struct in6_addr *) + x->props.saddr.a6, + (struct in6_addr *)saddr)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + if (!create) + return NULL; + + x = xfrm_state_alloc(); + if (likely(x)) { + switch (family) { + case AF_INET: + x->sel.daddr.a4 = daddr->a4; + x->sel.saddr.a4 = saddr->a4; + x->sel.prefixlen_d = 32; + x->sel.prefixlen_s = 32; + x->props.saddr.a4 = saddr->a4; + x->id.daddr.a4 = daddr->a4; + break; + + case AF_INET6: + ipv6_addr_copy((struct in6_addr *)x->sel.daddr.a6, + (struct in6_addr *)daddr); + ipv6_addr_copy((struct in6_addr *)x->sel.saddr.a6, + (struct in6_addr *)saddr); + x->sel.prefixlen_d = 128; + x->sel.prefixlen_s = 128; + ipv6_addr_copy((struct in6_addr *)x->props.saddr.a6, + (struct in6_addr *)saddr); + ipv6_addr_copy((struct in6_addr *)x->id.daddr.a6, + (struct in6_addr *)daddr); + break; + }; + + x->km.state = XFRM_STATE_ACQ; + x->id.proto = proto; + x->props.family = family; + x->props.mode = mode; + x->props.reqid = reqid; + x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; + xfrm_state_hold(x); + x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; + add_timer(&x->timer); + xfrm_state_hold(x); + list_add_tail(&x->bydst, xfrm_state_bydst+h); + h = xfrm_src_hash(saddr, family); + xfrm_state_hold(x); + list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + wake_up(&km_waitq); + } + + return x; +} + static inline struct xfrm_state * __xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, int use_spi) @@ -533,9 +628,9 @@ int xfrm_state_add(struct xfrm_state *x) } if (use_spi && !x1) - x1 = afinfo->find_acq( - x->props.mode, x->props.reqid, x->id.proto, - &x->id.daddr, &x->props.saddr, 0); + x1 = __find_acq_core(family, x->props.mode, x->props.reqid, + x->id.proto, + &x->id.daddr, &x->props.saddr, 0); __xfrm_state_insert(x); err = 0; @@ -716,14 +811,11 @@ xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->find_acq(mode, reqid, proto, daddr, saddr, create); + x = __find_acq_core(family, mode, reqid, proto, daddr, saddr, create); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); + return x; } EXPORT_SYMBOL(xfrm_find_acq); @@ -1181,7 +1273,6 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; else { - afinfo->state_bydst = xfrm_state_bydst; afinfo->state_bysrc = xfrm_state_bysrc; afinfo->state_byspi = xfrm_state_byspi; xfrm_state_afinfo[afinfo->family] = afinfo; @@ -1206,7 +1297,6 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) xfrm_state_afinfo[afinfo->family] = NULL; afinfo->state_byspi = NULL; afinfo->state_bysrc = NULL; - afinfo->state_bydst = NULL; } } write_unlock_bh(&xfrm_state_afinfo_lock); -- cgit v1.3-8-gc7d7 From edcd582152090bfb0ccb4ad444c151798a73eda8 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 00:42:45 -0700 Subject: [XFRM]: Pull xfrm_state_by{spi,src} hash table knowledge out of afinfo. Signed-off-by: David S. Miller --- include/net/xfrm.h | 78 ------------------ net/ipv4/xfrm4_state.c | 28 ------- net/ipv6/xfrm6_state.c | 40 ---------- net/xfrm/xfrm_state.c | 210 +++++++++++++++++++++++++++++++++++++------------ 4 files changed, 159 insertions(+), 197 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index cc83443f301e..dd3b84b9c04e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -243,14 +243,10 @@ extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { unsigned short family; - struct list_head *state_bysrc; - struct list_head *state_byspi; int (*init_flags)(struct xfrm_state *x); void (*init_tempsel)(struct xfrm_state *x, struct flowi *fl, struct xfrm_tmpl *tmpl, xfrm_address_t *daddr, xfrm_address_t *saddr); - struct xfrm_state *(*state_lookup)(xfrm_address_t *daddr, u32 spi, u8 proto); - struct xfrm_state *(*state_lookup_byaddr)(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); int (*tmpl_sort)(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n); int (*state_sort)(struct xfrm_state **dst, struct xfrm_state **src, int n); }; @@ -431,80 +427,6 @@ static inline void xfrm_pols_put(struct xfrm_policy **pols, int npols) } #endif -#define XFRM_DST_HSIZE 1024 - -static __inline__ -unsigned __xfrm4_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a4); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_dst_hash(xfrm_address_t *addr) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]); - h = (h ^ (h>>16)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm4_src_hash(xfrm_address_t *addr) -{ - return __xfrm4_dst_hash(addr); -} - -static __inline__ -unsigned __xfrm6_src_hash(xfrm_address_t *addr) -{ - return __xfrm6_dst_hash(addr); -} - -static __inline__ -unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_src_hash(addr); - case AF_INET6: - return __xfrm6_src_hash(addr); - } - return 0; -} - -static __inline__ -unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a4^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) -{ - unsigned h; - h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); - h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; - return h; -} - -static __inline__ -unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) -{ - switch (family) { - case AF_INET: - return __xfrm4_spi_hash(addr, spi, proto); - case AF_INET6: - return __xfrm6_spi_hash(addr, spi, proto); - } - return 0; /*XXX*/ -} - extern void __xfrm_state_destroy(struct xfrm_state *); static inline void __xfrm_state_put(struct xfrm_state *x) diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 9dc1afc17b6d..6a2a4ab42772 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -62,38 +62,10 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET; } -static struct xfrm_state * -__xfrm4_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm4_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm4_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET && - spi == x->id.spi && - daddr->a4 == x->id.daddr.a4 && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -/* placeholder until ipv4's code is written */ -static struct xfrm_state * -__xfrm4_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - return NULL; -} - static struct xfrm_state_afinfo xfrm4_state_afinfo = { .family = AF_INET, .init_flags = xfrm4_init_flags, .init_tempsel = __xfrm4_init_tempsel, - .state_lookup = __xfrm4_state_lookup, - .state_lookup_byaddr = __xfrm4_state_lookup_byaddr, }; void __init xfrm4_state_init(void) diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index 40fcaab7e028..d88cd92c864e 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -63,44 +63,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.family = AF_INET6; } -static struct xfrm_state * -__xfrm6_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, - u8 proto) -{ - struct xfrm_state *x = NULL; - unsigned h; - - h = __xfrm6_src_hash(saddr); - list_for_each_entry(x, xfrm6_state_afinfo.state_bysrc+h, bysrc) { - if (x->props.family == AF_INET6 && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - ipv6_addr_equal((struct in6_addr *)saddr, (struct in6_addr *)x->props.saddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - -static struct xfrm_state * -__xfrm6_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto) -{ - unsigned h = __xfrm6_spi_hash(daddr, spi, proto); - struct xfrm_state *x; - - list_for_each_entry(x, xfrm6_state_afinfo.state_byspi+h, byspi) { - if (x->props.family == AF_INET6 && - spi == x->id.spi && - ipv6_addr_equal((struct in6_addr *)daddr, (struct in6_addr *)x->id.daddr.a6) && - proto == x->id.proto) { - xfrm_state_hold(x); - return x; - } - } - return NULL; -} - static int __xfrm6_state_sort(struct xfrm_state **dst, struct xfrm_state **src, int n) { @@ -223,8 +185,6 @@ __xfrm6_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, int n) static struct xfrm_state_afinfo xfrm6_state_afinfo = { .family = AF_INET6, .init_tempsel = __xfrm6_init_tempsel, - .state_lookup = __xfrm6_state_lookup, - .state_lookup_byaddr = __xfrm6_state_lookup_byaddr, .tmpl_sort = __xfrm6_tmpl_sort, .state_sort = __xfrm6_state_sort, }; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 80f5f9dc2b9e..4a3832f81c37 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -38,6 +38,8 @@ EXPORT_SYMBOL(sysctl_xfrm_aevent_rseqth); static DEFINE_SPINLOCK(xfrm_state_lock); +#define XFRM_DST_HSIZE 1024 + /* Hash table to find appropriate SA towards given target (endpoint * of tunnel or destination of transport mode) allowed by selector. * @@ -48,6 +50,48 @@ static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static __inline__ +unsigned __xfrm4_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a4); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_dst_hash(xfrm_address_t *addr) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]); + h = (h ^ (h>>16)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm4_src_hash(xfrm_address_t *addr) +{ + return __xfrm4_dst_hash(addr); +} + +static __inline__ +unsigned __xfrm6_src_hash(xfrm_address_t *addr) +{ + return __xfrm6_dst_hash(addr); +} + +static __inline__ +unsigned xfrm_src_hash(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_src_hash(addr); + case AF_INET6: + return __xfrm6_src_hash(addr); + } + return 0; +} + static __inline__ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) { @@ -60,6 +104,36 @@ unsigned xfrm_dst_hash(xfrm_address_t *addr, unsigned short family) return 0; } +static __inline__ +unsigned __xfrm4_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a4^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned __xfrm6_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto) +{ + unsigned h; + h = ntohl(addr->a6[2]^addr->a6[3]^spi^proto); + h = (h ^ (h>>10) ^ (h>>20)) % XFRM_DST_HSIZE; + return h; +} + +static __inline__ +unsigned xfrm_spi_hash(xfrm_address_t *addr, u32 spi, u8 proto, unsigned short family) +{ + switch (family) { + case AF_INET: + return __xfrm4_spi_hash(addr, spi, proto); + case AF_INET6: + return __xfrm6_spi_hash(addr, spi, proto); + } + return 0; /*XXX*/ +} + DECLARE_WAIT_QUEUE_HEAD(km_waitq); EXPORT_SYMBOL(km_waitq); @@ -342,6 +416,83 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + if (x->props.family != family || + x->id.spi != spi || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) +{ + unsigned int h = xfrm_src_hash(saddr, family); + struct xfrm_state *x; + + list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + if (x->props.family != family || + x->id.proto != proto) + continue; + + switch (family) { + case AF_INET: + if (x->id.daddr.a4 != daddr->a4 || + x->props.saddr.a4 != saddr->a4) + continue; + break; + case AF_INET6: + if (!ipv6_addr_equal((struct in6_addr *)daddr, + (struct in6_addr *) + x->id.daddr.a6) || + !ipv6_addr_equal((struct in6_addr *)saddr, + (struct in6_addr *) + x->props.saddr.a6)) + continue; + break; + }; + + xfrm_state_hold(x); + return x; + } + + return NULL; +} + +static inline struct xfrm_state * +__xfrm_state_locate(struct xfrm_state *x, int use_spi, int family) +{ + if (use_spi) + return __xfrm_state_lookup(&x->id.daddr, x->id.spi, + x->id.proto, family); + else + return __xfrm_state_lookup_byaddr(&x->id.daddr, + &x->props.saddr, + x->id.proto, family); +} + struct xfrm_state * xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct flowi *fl, struct xfrm_tmpl *tmpl, @@ -353,14 +504,7 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; - struct xfrm_state_afinfo *afinfo; - afinfo = xfrm_state_get_afinfo(family); - if (afinfo == NULL) { - *err = -EAFNOSUPPORT; - return NULL; - } - spin_lock_bh(&xfrm_state_lock); list_for_each_entry(x, xfrm_state_bydst+h, bydst) { if (x->props.family == family && @@ -406,8 +550,8 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, x = best; if (!x && !error && !acquire_in_progress) { if (tmpl->id.spi && - (x0 = afinfo->state_lookup(daddr, tmpl->id.spi, - tmpl->id.proto)) != NULL) { + (x0 = __xfrm_state_lookup(daddr, tmpl->id.spi, + tmpl->id.proto, family)) != NULL) { xfrm_state_put(x0); error = -EEXIST; goto out; @@ -457,7 +601,6 @@ out: else *err = acquire_in_progress ? -EAGAIN : error; spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } @@ -584,34 +727,20 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re return x; } -static inline struct xfrm_state * -__xfrm_state_locate(struct xfrm_state_afinfo *afinfo, struct xfrm_state *x, - int use_spi) -{ - if (use_spi) - return afinfo->state_lookup(&x->id.daddr, x->id.spi, x->id.proto); - else - return afinfo->state_lookup_byaddr(&x->id.daddr, &x->props.saddr, x->id.proto); -} - static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq); int xfrm_state_add(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int family; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); family = x->props.family; - afinfo = xfrm_state_get_afinfo(family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, family); if (x1) { xfrm_state_put(x1); x1 = NULL; @@ -637,7 +766,6 @@ int xfrm_state_add(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (!err) xfrm_flush_all_bundles(); @@ -653,17 +781,12 @@ EXPORT_SYMBOL(xfrm_state_add); int xfrm_state_update(struct xfrm_state *x) { - struct xfrm_state_afinfo *afinfo; struct xfrm_state *x1; int err; int use_spi = xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY); - afinfo = xfrm_state_get_afinfo(x->props.family); - if (unlikely(afinfo == NULL)) - return -EAFNOSUPPORT; - spin_lock_bh(&xfrm_state_lock); - x1 = __xfrm_state_locate(afinfo, x, use_spi); + x1 = __xfrm_state_locate(x, use_spi, x->props.family); err = -ESRCH; if (!x1) @@ -683,7 +806,6 @@ int xfrm_state_update(struct xfrm_state *x) out: spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); if (err) return err; @@ -776,14 +898,10 @@ xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup(daddr, spi, proto); + x = __xfrm_state_lookup(daddr, spi, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup); @@ -793,14 +911,10 @@ xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family) { struct xfrm_state *x; - struct xfrm_state_afinfo *afinfo = xfrm_state_get_afinfo(family); - if (!afinfo) - return NULL; spin_lock_bh(&xfrm_state_lock); - x = afinfo->state_lookup_byaddr(daddr, saddr, proto); + x = __xfrm_state_lookup_byaddr(daddr, saddr, proto, family); spin_unlock_bh(&xfrm_state_lock); - xfrm_state_put_afinfo(afinfo); return x; } EXPORT_SYMBOL(xfrm_state_lookup_byaddr); @@ -1272,11 +1386,8 @@ int xfrm_state_register_afinfo(struct xfrm_state_afinfo *afinfo) write_lock_bh(&xfrm_state_afinfo_lock); if (unlikely(xfrm_state_afinfo[afinfo->family] != NULL)) err = -ENOBUFS; - else { - afinfo->state_bysrc = xfrm_state_bysrc; - afinfo->state_byspi = xfrm_state_byspi; + else xfrm_state_afinfo[afinfo->family] = afinfo; - } write_unlock_bh(&xfrm_state_afinfo_lock); return err; } @@ -1293,11 +1404,8 @@ int xfrm_state_unregister_afinfo(struct xfrm_state_afinfo *afinfo) if (likely(xfrm_state_afinfo[afinfo->family] != NULL)) { if (unlikely(xfrm_state_afinfo[afinfo->family] != afinfo)) err = -EINVAL; - else { + else xfrm_state_afinfo[afinfo->family] = NULL; - afinfo->state_byspi = NULL; - afinfo->state_bysrc = NULL; - } } write_unlock_bh(&xfrm_state_afinfo_lock); return err; -- cgit v1.3-8-gc7d7 From 8f126e37c0b250310a48a609bedf92a19a5559ec Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 02:45:07 -0700 Subject: [XFRM]: Convert xfrm_state hash linkage to hlists. Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 ++-- net/xfrm/xfrm_state.c | 92 ++++++++++++++++++++++++++++----------------------- 2 files changed, 54 insertions(+), 44 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index dd3b84b9c04e..3405e5d9d51c 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -94,9 +94,9 @@ extern struct mutex xfrm_cfg_mutex; struct xfrm_state { /* Note: bydst is re-used during gc */ - struct list_head bydst; - struct list_head bysrc; - struct list_head byspi; + struct hlist_node bydst; + struct hlist_node bysrc; + struct hlist_node byspi; atomic_t refcnt; spinlock_t lock; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 4a3832f81c37..fe3c8c38d5e1 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -46,9 +46,9 @@ static DEFINE_SPINLOCK(xfrm_state_lock); * Main use is finding SA after policy selected tunnel or transport mode. * Also, it can be used by ah/esp icmp error handler to find offending SA. */ -static struct list_head xfrm_state_bydst[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_bysrc[XFRM_DST_HSIZE]; -static struct list_head xfrm_state_byspi[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bydst[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_bysrc[XFRM_DST_HSIZE]; +static struct hlist_head xfrm_state_byspi[XFRM_DST_HSIZE]; static __inline__ unsigned __xfrm4_dst_hash(xfrm_address_t *addr) @@ -141,7 +141,7 @@ static DEFINE_RWLOCK(xfrm_state_afinfo_lock); static struct xfrm_state_afinfo *xfrm_state_afinfo[NPROTO]; static struct work_struct xfrm_state_gc_work; -static struct list_head xfrm_state_gc_list = LIST_HEAD_INIT(xfrm_state_gc_list); +static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); static int xfrm_state_gc_flush_bundles; @@ -178,8 +178,8 @@ static void xfrm_state_gc_destroy(struct xfrm_state *x) static void xfrm_state_gc_task(void *data) { struct xfrm_state *x; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; if (xfrm_state_gc_flush_bundles) { xfrm_state_gc_flush_bundles = 0; @@ -187,13 +187,13 @@ static void xfrm_state_gc_task(void *data) } spin_lock_bh(&xfrm_state_gc_lock); - list_splice_init(&xfrm_state_gc_list, &gc_list); + gc_list.first = xfrm_state_gc_list.first; + INIT_HLIST_HEAD(&xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - x = list_entry(entry, struct xfrm_state, bydst); + hlist_for_each_entry_safe(x, entry, tmp, &gc_list, bydst) xfrm_state_gc_destroy(x); - } + wake_up(&km_waitq); } @@ -287,9 +287,9 @@ struct xfrm_state *xfrm_state_alloc(void) if (x) { atomic_set(&x->refcnt, 1); atomic_set(&x->tunnel_users, 0); - INIT_LIST_HEAD(&x->bydst); - INIT_LIST_HEAD(&x->bysrc); - INIT_LIST_HEAD(&x->byspi); + INIT_HLIST_NODE(&x->bydst); + INIT_HLIST_NODE(&x->bysrc); + INIT_HLIST_NODE(&x->byspi); init_timer(&x->timer); x->timer.function = xfrm_timer_handler; x->timer.data = (unsigned long)x; @@ -314,7 +314,7 @@ void __xfrm_state_destroy(struct xfrm_state *x) BUG_TRAP(x->km.state == XFRM_STATE_DEAD); spin_lock_bh(&xfrm_state_gc_lock); - list_add(&x->bydst, &xfrm_state_gc_list); + hlist_add_head(&x->bydst, &xfrm_state_gc_list); spin_unlock_bh(&xfrm_state_gc_lock); schedule_work(&xfrm_state_gc_work); } @@ -327,12 +327,12 @@ int __xfrm_state_delete(struct xfrm_state *x) if (x->km.state != XFRM_STATE_DEAD) { x->km.state = XFRM_STATE_DEAD; spin_lock(&xfrm_state_lock); - list_del(&x->bydst); + hlist_del(&x->bydst); __xfrm_state_put(x); - list_del(&x->bysrc); + hlist_del(&x->bysrc); __xfrm_state_put(x); if (x->id.spi) { - list_del(&x->byspi); + hlist_del(&x->byspi); __xfrm_state_put(x); } spin_unlock(&xfrm_state_lock); @@ -378,12 +378,13 @@ EXPORT_SYMBOL(xfrm_state_delete); void xfrm_state_flush(u8 proto) { int i; - struct xfrm_state *x; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { + struct hlist_node *entry; + struct xfrm_state *x; restart: - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_state_kern(x) && xfrm_id_proto_match(x->id.proto, proto)) { xfrm_state_hold(x); @@ -420,8 +421,9 @@ static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 { unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_byspi+h, byspi) { + hlist_for_each_entry(x, entry, xfrm_state_byspi+h, byspi) { if (x->props.family != family || x->id.spi != spi || x->id.proto != proto) @@ -451,8 +453,9 @@ static struct xfrm_state *__xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm { unsigned int h = xfrm_src_hash(saddr, family); struct xfrm_state *x; + struct hlist_node *entry; - list_for_each_entry(x, xfrm_state_bysrc+h, bysrc) { + hlist_for_each_entry(x, entry, xfrm_state_bysrc+h, bysrc) { if (x->props.family != family || x->id.proto != proto) continue; @@ -499,14 +502,15 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, struct xfrm_policy *pol, int *err, unsigned short family) { - unsigned h = xfrm_dst_hash(daddr, family); + unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x, *x0; int acquire_in_progress = 0; int error = 0; struct xfrm_state *best = NULL; spin_lock_bh(&xfrm_state_lock); - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.family == family && x->props.reqid == tmpl->reqid && !(x->props.flags & XFRM_STATE_WILDRECV) && @@ -575,13 +579,14 @@ xfrm_state_find(xfrm_address_t *daddr, xfrm_address_t *saddr, if (km_query(x, tmpl, pol) == 0) { x->km.state = XFRM_STATE_ACQ; - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + h = xfrm_src_hash(saddr, family); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (x->id.spi) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } x->lft.hard_add_expires_seconds = XFRM_ACQ_EXPIRES; @@ -608,19 +613,19 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned h = xfrm_dst_hash(&x->id.daddr, x->props.family); - list_add(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); h = xfrm_src_hash(&x->props.saddr, x->props.family); - list_add(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); xfrm_state_hold(x); if (xfrm_id_proto_match(x->id.proto, IPSEC_PROTO_ANY)) { h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); } @@ -648,9 +653,10 @@ EXPORT_SYMBOL(xfrm_state_insert); static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create) { unsigned int h = xfrm_dst_hash(daddr, family); + struct hlist_node *entry; struct xfrm_state *x; - list_for_each_entry(x, xfrm_state_bydst+h, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { if (x->props.reqid != reqid || x->props.mode != mode || x->props.family != family || @@ -717,10 +723,10 @@ static struct xfrm_state *__find_acq_core(unsigned short family, u8 mode, u32 re x->timer.expires = jiffies + XFRM_ACQ_EXPIRES*HZ; add_timer(&x->timer); xfrm_state_hold(x); - list_add_tail(&x->bydst, xfrm_state_bydst+h); + hlist_add_head(&x->bydst, xfrm_state_bydst+h); h = xfrm_src_hash(saddr, family); xfrm_state_hold(x); - list_add_tail(&x->bysrc, xfrm_state_bysrc+h); + hlist_add_head(&x->bysrc, xfrm_state_bysrc+h); wake_up(&km_waitq); } @@ -977,11 +983,14 @@ EXPORT_SYMBOL(xfrm_state_sort); static struct xfrm_state *__xfrm_find_acq_byseq(u32 seq) { int i; - struct xfrm_state *x; for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { - if (x->km.seq == seq && x->km.state == XFRM_STATE_ACQ) { + struct hlist_node *entry; + struct xfrm_state *x; + + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { + if (x->km.seq == seq && + x->km.state == XFRM_STATE_ACQ) { xfrm_state_hold(x); return x; } @@ -1047,7 +1056,7 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) if (x->id.spi) { spin_lock_bh(&xfrm_state_lock); h = xfrm_spi_hash(&x->id.daddr, x->id.spi, x->id.proto, x->props.family); - list_add(&x->byspi, xfrm_state_byspi+h); + hlist_add_head(&x->byspi, xfrm_state_byspi+h); xfrm_state_hold(x); spin_unlock_bh(&xfrm_state_lock); wake_up(&km_waitq); @@ -1060,12 +1069,13 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), { int i; struct xfrm_state *x; + struct hlist_node *entry; int count = 0; int err = 0; spin_lock_bh(&xfrm_state_lock); for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (xfrm_id_proto_match(x->id.proto, proto)) count++; } @@ -1076,7 +1086,7 @@ int xfrm_state_walk(u8 proto, int (*func)(struct xfrm_state *, int, void*), } for (i = 0; i < XFRM_DST_HSIZE; i++) { - list_for_each_entry(x, xfrm_state_bydst+i, bydst) { + hlist_for_each_entry(x, entry, xfrm_state_bydst+i, bydst) { if (!xfrm_id_proto_match(x->id.proto, proto)) continue; err = func(x, --count, data); @@ -1524,9 +1534,9 @@ void __init xfrm_state_init(void) int i; for (i=0; i Date: Thu, 24 Aug 2006 03:18:09 -0700 Subject: [XFRM]: Add generation count to xfrm_state and xfrm_dst. Each xfrm_state inserted gets a new generation counter value. When a bundle is created, the xfrm_dst objects get the current generation counter of the xfrm_state they will attach to at dst->xfrm. xfrm_bundle_ok() will return false if it sees an xfrm_dst with a generation count different from the generation count of the xfrm_state that dst points to. This provides a facility by which to passively and cheaply invalidate cached IPSEC routes during SA database changes. Signed-off-by: David S. Miller --- include/net/xfrm.h | 3 +++ net/ipv4/xfrm4_policy.c | 1 + net/ipv6/xfrm6_policy.c | 1 + net/xfrm/xfrm_policy.c | 2 ++ net/xfrm/xfrm_state.c | 3 +++ 5 files changed, 10 insertions(+) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 3405e5d9d51c..fd4a300b5baf 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -104,6 +104,8 @@ struct xfrm_state struct xfrm_id id; struct xfrm_selector sel; + u32 genid; + /* Key manger bits */ struct { u8 state; @@ -590,6 +592,7 @@ struct xfrm_dst struct rt6_info rt6; } u; struct dst_entry *route; + u32 genid; u32 route_mtu_cached; u32 child_mtu_cached; u32 route_cookie; diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 42d8ded0f96a..479598566f1d 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -93,6 +93,7 @@ __xfrm4_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; dst1->next = dst_prev; dst_prev = dst1; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 98c2fe449b3f..9391c4c94feb 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -149,6 +149,7 @@ __xfrm6_bundle_create(struct xfrm_policy *policy, struct xfrm_state **xfrm, int xdst = (struct xfrm_dst *)dst1; xdst->route = &rt->u.dst; + xdst->genid = xfrm[i]->genid; if (rt->rt6i_node) xdst->route_cookie = rt->rt6i_node->fn_sernum; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1732159ffd01..7fc6944ee36f 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1536,6 +1536,8 @@ int xfrm_bundle_ok(struct xfrm_dst *first, struct flowi *fl, int family, int str return 0; if (dst->xfrm->km.state != XFRM_STATE_VALID) return 0; + if (xdst->genid != dst->xfrm->genid) + return 0; if (strict && fl && dst->xfrm->props.mode != XFRM_MODE_TUNNEL && !xfrm_state_addr_flow_check(dst->xfrm, fl, family)) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 445263c54c94..535d43c14720 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -53,6 +53,7 @@ static struct hlist_head *xfrm_state_byspi __read_mostly; static unsigned int xfrm_state_hmask __read_mostly; static unsigned int xfrm_state_hashmax __read_mostly = 1 * 1024 * 1024; static unsigned int xfrm_state_num; +static unsigned int xfrm_state_genid; static inline unsigned int __xfrm4_dst_hash(xfrm_address_t *addr, unsigned int hmask) { @@ -745,6 +746,8 @@ static void __xfrm_state_insert(struct xfrm_state *x) { unsigned int h = xfrm_dst_hash(&x->id.daddr, x->props.family); + x->genid = ++xfrm_state_genid; + hlist_add_head(&x->bydst, xfrm_state_bydst+h); xfrm_state_hold(x); -- cgit v1.3-8-gc7d7 From c7f5ea3a4d1ae6b3b426e113358fdc57494bc754 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:29:04 -0700 Subject: [XFRM]: Do not flush all bundles on SA insert. Instead, simply set all potentially aliasing existing xfrm_state objects to have the current generation counter value. This will make routes get relooked up the next time an existing route mentioning these aliased xfrm_state objects gets used, via xfrm_dst_check(). Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 10 ---------- net/xfrm/xfrm_state.c | 25 ++++++++++++++++++++----- 3 files changed, 20 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index fd4a300b5baf..a620a43c9eeb 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -996,7 +996,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); extern int xfrm_flush_bundles(void); -extern void xfrm_flush_all_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 7fc6944ee36f..cfa5c692f2e8 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1478,16 +1478,6 @@ int xfrm_flush_bundles(void) return 0; } -static int always_true(struct dst_entry *dst) -{ - return 1; -} - -void xfrm_flush_all_bundles(void) -{ - xfrm_prune_bundles(always_true); -} - void xfrm_init_pmtu(struct dst_entry *dst) { do { diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 98200397e098..77ef796c9d0d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -761,13 +761,30 @@ static void __xfrm_state_insert(struct xfrm_state *x) schedule_work(&xfrm_hash_work); } +/* xfrm_state_lock is held */ +static void __xfrm_state_bump_genids(struct xfrm_state *xnew) +{ + unsigned short family = xnew->props.family; + u32 reqid = xnew->props.reqid; + struct xfrm_state *x; + struct hlist_node *entry; + unsigned int h; + + h = xfrm_dst_hash(&xnew->id.daddr, reqid, family); + hlist_for_each_entry(x, entry, xfrm_state_bydst+h, bydst) { + if (x->props.family == family && + x->props.reqid == reqid && + !xfrm_addr_cmp(&x->id.daddr, &xnew->id.daddr, family)) + x->genid = xfrm_state_genid; + } +} + void xfrm_state_insert(struct xfrm_state *x) { spin_lock_bh(&xfrm_state_lock); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); spin_unlock_bh(&xfrm_state_lock); - - xfrm_flush_all_bundles(); } EXPORT_SYMBOL(xfrm_state_insert); @@ -889,15 +906,13 @@ int xfrm_state_add(struct xfrm_state *x) x->id.proto, &x->id.daddr, &x->props.saddr, 0); + __xfrm_state_bump_genids(x); __xfrm_state_insert(x); err = 0; out: spin_unlock_bh(&xfrm_state_lock); - if (!err) - xfrm_flush_all_bundles(); - if (x1) { xfrm_state_delete(x1); xfrm_state_put(x1); -- cgit v1.3-8-gc7d7 From 1c0953997567b22e32fdf85d3b4bc0f2461fd161 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 03:30:28 -0700 Subject: [XFRM]: Purge dst references to deleted SAs passively. Just let GC and other normal mechanisms take care of getting rid of DST cache references to deleted xfrm_state objects instead of walking all the policy bundles. Signed-off-by: David S. Miller --- include/net/xfrm.h | 1 - net/xfrm/xfrm_policy.c | 2 +- net/xfrm/xfrm_state.c | 17 ----------------- 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index a620a43c9eeb..c7870b6eae01 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -995,7 +995,6 @@ struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, int create, unsigned short family); extern void xfrm_policy_flush(u8 type); extern int xfrm_sk_policy_insert(struct sock *sk, int dir, struct xfrm_policy *pol); -extern int xfrm_flush_bundles(void); extern int xfrm_bundle_ok(struct xfrm_dst *xdst, struct flowi *fl, int family, int strict); extern void xfrm_init_pmtu(struct dst_entry *dst); diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index cfa5c692f2e8..1bcaae4adf3a 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1472,7 +1472,7 @@ static void __xfrm_garbage_collect(void) xfrm_prune_bundles(unused_bundle); } -int xfrm_flush_bundles(void) +static int xfrm_flush_bundles(void) { xfrm_prune_bundles(stale_bundle); return 0; diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 77ef796c9d0d..9ff00b7d6ad3 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -256,8 +256,6 @@ static struct work_struct xfrm_state_gc_work; static HLIST_HEAD(xfrm_state_gc_list); static DEFINE_SPINLOCK(xfrm_state_gc_lock); -static int xfrm_state_gc_flush_bundles; - int __xfrm_state_delete(struct xfrm_state *x); static struct xfrm_state_afinfo *xfrm_state_get_afinfo(unsigned short family); @@ -293,11 +291,6 @@ static void xfrm_state_gc_task(void *data) struct hlist_node *entry, *tmp; struct hlist_head gc_list; - if (xfrm_state_gc_flush_bundles) { - xfrm_state_gc_flush_bundles = 0; - xfrm_flush_bundles(); - } - spin_lock_bh(&xfrm_state_gc_lock); gc_list.first = xfrm_state_gc_list.first; INIT_HLIST_HEAD(&xfrm_state_gc_list); @@ -454,16 +447,6 @@ int __xfrm_state_delete(struct xfrm_state *x) if (del_timer(&x->rtimer)) __xfrm_state_put(x); - /* The number two in this test is the reference - * mentioned in the comment below plus the reference - * our caller holds. A larger value means that - * there are DSTs attached to this xfrm_state. - */ - if (atomic_read(&x->refcnt) > 2) { - xfrm_state_gc_flush_bundles = 1; - schedule_work(&xfrm_state_gc_work); - } - /* All xfrm_state objects are created by xfrm_state_alloc. * The xfrm_state_alloc call gives a reference, and that * is what we are dropping here. -- cgit v1.3-8-gc7d7 From 2518c7c2b3d7f0a6b302b4efe17c911f8dd4049f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Thu, 24 Aug 2006 04:45:07 -0700 Subject: [XFRM]: Hash policies when non-prefixed. This idea is from Alexey Kuznetsov. It is common for policies to be non-prefixed. And for that case we can optimize lookups, insert, etc. quite a bit. For each direction, we have a dynamically sized policy hash table for non-prefixed policies. We also have a hash table on policy->index. For prefixed policies, we have a list per-direction which we will consult on lookups when a non-prefix hashtable lookup fails. This still isn't as efficient as I would like it. There are four immediate problems: 1) Lots of excessive refcounting, which can be fixed just like xfrm_state was 2) We do 2 hash probes on insert, one to look for dups and one to allocate a unique policy->index. Althought I wonder how much this matters since xfrm_state inserts do up to 3 hash probes and that seems to perform fine. 3) xfrm_policy_insert() is very complex because of the priority ordering and entry replacement logic. 4) Lots of counter bumping, in addition to policy refcounts, in the form of xfrm_policy_count[]. This is merely used to let code path(s) know that some IPSEC rules exist. So this count is indexed per-direction, maybe that is overkill. Signed-off-by: David S. Miller --- include/net/xfrm.h | 23 +- net/xfrm/xfrm_policy.c | 681 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 546 insertions(+), 158 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c7870b6eae01..0acabf2a0a8f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -331,7 +331,8 @@ struct xfrm_tmpl struct xfrm_policy { struct xfrm_policy *next; - struct list_head list; + struct hlist_node bydst; + struct hlist_node byidx; /* This lock only affects elements except for entry. */ rwlock_t lock; @@ -385,21 +386,7 @@ struct xfrm_mgr extern int xfrm_register_km(struct xfrm_mgr *km); extern int xfrm_unregister_km(struct xfrm_mgr *km); - -extern struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -#ifdef CONFIG_XFRM_SUB_POLICY -extern struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; - -static inline int xfrm_policy_lists_empty(int dir) -{ - return (!xfrm_policy_list[dir] && !xfrm_policy_list_sub[dir]); -} -#else -static inline int xfrm_policy_lists_empty(int dir) -{ - return (!xfrm_policy_list[dir]); -} -#endif +extern unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; static inline void xfrm_pol_hold(struct xfrm_policy *policy) { @@ -678,7 +665,7 @@ static inline int xfrm_policy_check(struct sock *sk, int dir, struct sk_buff *sk if (sk && sk->sk_policy[XFRM_POLICY_IN]) return __xfrm_policy_check(sk, dir, skb, family); - return (xfrm_policy_lists_empty(dir) && !skb->sp) || + return (!xfrm_policy_count[dir] && !skb->sp) || (skb->dst->flags & DST_NOPOLICY) || __xfrm_policy_check(sk, dir, skb, family); } @@ -698,7 +685,7 @@ extern int __xfrm_route_forward(struct sk_buff *skb, unsigned short family); static inline int xfrm_route_forward(struct sk_buff *skb, unsigned short family) { - return xfrm_policy_lists_empty(XFRM_POLICY_OUT) || + return !xfrm_policy_count[XFRM_POLICY_OUT] || (skb->dst->flags & DST_NOXFRM) || __xfrm_route_forward(skb, family); } diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 1bcaae4adf3a..087a5443b051 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -22,6 +22,9 @@ #include #include #include +#include +#include +#include #include #include @@ -30,26 +33,8 @@ EXPORT_SYMBOL(xfrm_cfg_mutex); static DEFINE_RWLOCK(xfrm_policy_lock); -struct xfrm_policy *xfrm_policy_list[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list); -#ifdef CONFIG_XFRM_SUB_POLICY -struct xfrm_policy *xfrm_policy_list_sub[XFRM_POLICY_MAX*2]; -EXPORT_SYMBOL(xfrm_policy_list_sub); - -#define XFRM_POLICY_LISTS(type) \ - ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub : \ - xfrm_policy_list) -#define XFRM_POLICY_LISTHEAD(type, dir) \ - ((type == XFRM_POLICY_TYPE_SUB) ? xfrm_policy_list_sub[dir] : \ - xfrm_policy_list[dir]) -#define XFRM_POLICY_LISTHEADP(type, dir) \ - ((type == XFRM_POLICY_TYPE_SUB) ? &xfrm_policy_list_sub[dir] : \ - &xfrm_policy_list[dir]) -#else -#define XFRM_POLICY_LISTS(type) xfrm_policy_list -#define XFRM_POLICY_LISTHEAD(type, dif) xfrm_policy_list[dir] -#define XFRM_POLICY_LISTHEADP(type, dif) &xfrm_policy_list[dir] -#endif +unsigned int xfrm_policy_count[XFRM_POLICY_MAX*2]; +EXPORT_SYMBOL(xfrm_policy_count); static DEFINE_RWLOCK(xfrm_policy_afinfo_lock); static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; @@ -57,8 +42,7 @@ static struct xfrm_policy_afinfo *xfrm_policy_afinfo[NPROTO]; static kmem_cache_t *xfrm_dst_cache __read_mostly; static struct work_struct xfrm_policy_gc_work; -static struct list_head xfrm_policy_gc_list = - LIST_HEAD_INIT(xfrm_policy_gc_list); +static HLIST_HEAD(xfrm_policy_gc_list); static DEFINE_SPINLOCK(xfrm_policy_gc_lock); static struct xfrm_policy_afinfo *xfrm_policy_get_afinfo(unsigned short family); @@ -328,8 +312,10 @@ struct xfrm_policy *xfrm_policy_alloc(gfp_t gfp) policy = kzalloc(sizeof(struct xfrm_policy), gfp); if (policy) { - atomic_set(&policy->refcnt, 1); + INIT_HLIST_NODE(&policy->bydst); + INIT_HLIST_NODE(&policy->byidx); rwlock_init(&policy->lock); + atomic_set(&policy->refcnt, 1); init_timer(&policy->timer); policy->timer.data = (unsigned long)policy; policy->timer.function = xfrm_policy_timer; @@ -375,17 +361,16 @@ static void xfrm_policy_gc_kill(struct xfrm_policy *policy) static void xfrm_policy_gc_task(void *data) { struct xfrm_policy *policy; - struct list_head *entry, *tmp; - struct list_head gc_list = LIST_HEAD_INIT(gc_list); + struct hlist_node *entry, *tmp; + struct hlist_head gc_list; spin_lock_bh(&xfrm_policy_gc_lock); - list_splice_init(&xfrm_policy_gc_list, &gc_list); + gc_list.first = xfrm_policy_gc_list.first; + INIT_HLIST_HEAD(&xfrm_policy_gc_list); spin_unlock_bh(&xfrm_policy_gc_lock); - list_for_each_safe(entry, tmp, &gc_list) { - policy = list_entry(entry, struct xfrm_policy, list); + hlist_for_each_entry_safe(policy, entry, tmp, &gc_list, bydst) xfrm_policy_gc_kill(policy); - } } /* Rule must be locked. Release descentant resources, announce @@ -407,70 +392,354 @@ static void xfrm_policy_kill(struct xfrm_policy *policy) } spin_lock(&xfrm_policy_gc_lock); - list_add(&policy->list, &xfrm_policy_gc_list); + hlist_add_head(&policy->bydst, &xfrm_policy_gc_list); spin_unlock(&xfrm_policy_gc_lock); schedule_work(&xfrm_policy_gc_work); } +struct xfrm_policy_hash { + struct hlist_head *table; + unsigned int hmask; +}; + +static struct hlist_head xfrm_policy_inexact[XFRM_POLICY_MAX*2]; +static struct xfrm_policy_hash xfrm_policy_bydst[XFRM_POLICY_MAX*2] __read_mostly; +static struct hlist_head *xfrm_policy_byidx __read_mostly; +static unsigned int xfrm_idx_hmask __read_mostly; +static unsigned int xfrm_policy_hashmax __read_mostly = 1 * 1024 * 1024; + +static inline unsigned int __idx_hash(u32 index, unsigned int hmask) +{ + return (index ^ (index >> 8)) & hmask; +} + +static inline unsigned int idx_hash(u32 index) +{ + return __idx_hash(index, xfrm_idx_hmask); +} + +static inline unsigned int __sel_hash(struct xfrm_selector *sel, unsigned short family, unsigned int hmask) +{ + xfrm_address_t *daddr = &sel->daddr; + xfrm_address_t *saddr = &sel->saddr; + unsigned int h = 0; + + switch (family) { + case AF_INET: + if (sel->prefixlen_d != 32 || + sel->prefixlen_s != 32) + return hmask + 1; + + h = ntohl(daddr->a4 ^ saddr->a4); + break; + + case AF_INET6: + if (sel->prefixlen_d != 128 || + sel->prefixlen_s != 128) + return hmask + 1; + + h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static inline unsigned int __addr_hash(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, unsigned int hmask) +{ + unsigned int h = 0; + + switch (family) { + case AF_INET: + h = ntohl(daddr->a4 ^ saddr->a4); + break; + + case AF_INET6: + h = ntohl(daddr->a6[2] ^ daddr->a6[3] ^ + saddr->a6[2] ^ saddr->a6[3]); + break; + }; + h ^= (h >> 16); + return h & hmask; +} + +static struct hlist_head *policy_hash_bysel(struct xfrm_selector *sel, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __sel_hash(sel, family, hmask); + + return (hash == hmask + 1 ? + &xfrm_policy_inexact[dir] : + xfrm_policy_bydst[dir].table + hash); +} + +static struct hlist_head *policy_hash_direct(xfrm_address_t *daddr, xfrm_address_t *saddr, unsigned short family, int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int hash = __addr_hash(daddr, saddr, family, hmask); + + return xfrm_policy_bydst[dir].table + hash; +} + +static struct hlist_head *xfrm_policy_hash_alloc(unsigned int sz) +{ + struct hlist_head *n; + + if (sz <= PAGE_SIZE) + n = kmalloc(sz, GFP_KERNEL); + else if (hashdist) + n = __vmalloc(sz, GFP_KERNEL, PAGE_KERNEL); + else + n = (struct hlist_head *) + __get_free_pages(GFP_KERNEL, get_order(sz)); + + if (n) + memset(n, 0, sz); + + return n; +} + +static void xfrm_policy_hash_free(struct hlist_head *n, unsigned int sz) +{ + if (sz <= PAGE_SIZE) + kfree(n); + else if (hashdist) + vfree(n); + else + free_pages((unsigned long)n, get_order(sz)); +} + +static void xfrm_dst_hash_transfer(struct hlist_head *list, + struct hlist_head *ndsttable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, bydst) { + unsigned int h; + + h = __addr_hash(&pol->selector.daddr, &pol->selector.saddr, + pol->family, nhashmask); + hlist_add_head(&pol->bydst, ndsttable+h); + } +} + +static void xfrm_idx_hash_transfer(struct hlist_head *list, + struct hlist_head *nidxtable, + unsigned int nhashmask) +{ + struct hlist_node *entry, *tmp; + struct xfrm_policy *pol; + + hlist_for_each_entry_safe(pol, entry, tmp, list, byidx) { + unsigned int h; + + h = __idx_hash(pol->index, nhashmask); + hlist_add_head(&pol->byidx, nidxtable+h); + } +} + +static unsigned long xfrm_new_hash_mask(unsigned int old_hmask) +{ + return ((old_hmask + 1) << 1) - 1; +} + +static void xfrm_bydst_resize(int dir) +{ + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *odst = xfrm_policy_bydst[dir].table; + struct hlist_head *ndst = xfrm_policy_hash_alloc(nsize); + int i; + + if (!ndst) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_dst_hash_transfer(odst + i, ndst, nhashmask); + + xfrm_policy_bydst[dir].table = ndst; + xfrm_policy_bydst[dir].hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_hash_free(odst, (hmask + 1) * sizeof(struct hlist_head)); +} + +static void xfrm_byidx_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + unsigned int nhashmask = xfrm_new_hash_mask(hmask); + unsigned int nsize = (nhashmask + 1) * sizeof(struct hlist_head); + struct hlist_head *oidx = xfrm_policy_byidx; + struct hlist_head *nidx = xfrm_policy_hash_alloc(nsize); + int i; + + if (!nidx) + return; + + write_lock_bh(&xfrm_policy_lock); + + for (i = hmask; i >= 0; i--) + xfrm_idx_hash_transfer(oidx + i, nidx, nhashmask); + + xfrm_policy_byidx = nidx; + xfrm_idx_hmask = nhashmask; + + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_hash_free(oidx, (hmask + 1) * sizeof(struct hlist_head)); +} + +static inline int xfrm_bydst_should_resize(int dir, int *total) +{ + unsigned int cnt = xfrm_policy_count[dir]; + unsigned int hmask = xfrm_policy_bydst[dir].hmask; + + if (total) + *total += cnt; + + if ((hmask + 1) < xfrm_policy_hashmax && + cnt > hmask) + return 1; + + return 0; +} + +static inline int xfrm_byidx_should_resize(int total) +{ + unsigned int hmask = xfrm_idx_hmask; + + if ((hmask + 1) < xfrm_policy_hashmax && + total > hmask) + return 1; + + return 0; +} + +static DEFINE_MUTEX(hash_resize_mutex); + +static void xfrm_hash_resize(void *__unused) +{ + int dir, total; + + mutex_lock(&hash_resize_mutex); + + total = 0; + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + if (xfrm_bydst_should_resize(dir, &total)) + xfrm_bydst_resize(dir); + } + if (xfrm_byidx_should_resize(total)) + xfrm_byidx_resize(total); + + mutex_unlock(&hash_resize_mutex); +} + +static DECLARE_WORK(xfrm_hash_work, xfrm_hash_resize, NULL); + /* Generate new index... KAME seems to generate them ordered by cost * of an absolute inpredictability of ordering of rules. This will not pass. */ static u32 xfrm_gen_index(u8 type, int dir) { - u32 idx; - struct xfrm_policy *p; static u32 idx_generator; for (;;) { + struct hlist_node *entry; + struct hlist_head *list; + struct xfrm_policy *p; + u32 idx; + int found; + idx = (idx_generator | dir); idx_generator += 8; if (idx == 0) idx = 8; - for (p = XFRM_POLICY_LISTHEAD(type, dir); p; p = p->next) { - if (p->index == idx) + list = xfrm_policy_byidx + idx_hash(idx); + found = 0; + hlist_for_each_entry(p, entry, list, byidx) { + if (p->index == idx) { + found = 1; break; + } } - if (!p) + if (!found) return idx; } } +static inline int selector_cmp(struct xfrm_selector *s1, struct xfrm_selector *s2) +{ + u32 *p1 = (u32 *) s1; + u32 *p2 = (u32 *) s2; + int len = sizeof(struct xfrm_selector) / sizeof(u32); + int i; + + for (i = 0; i < len; i++) { + if (p1[i] != p2[i]) + return 1; + } + + return 0; +} + int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) { - struct xfrm_policy *pol, **p; - struct xfrm_policy *delpol = NULL; - struct xfrm_policy **newpos = NULL; + struct xfrm_policy *pol; + struct xfrm_policy *delpol; + struct hlist_head *chain; + struct hlist_node *entry, *newpos, *last; struct dst_entry *gc_list; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(policy->type, dir); (pol=*p)!=NULL;) { - if (!delpol && memcmp(&policy->selector, &pol->selector, sizeof(pol->selector)) == 0 && + chain = policy_hash_bysel(&policy->selector, policy->family, dir); + delpol = NULL; + newpos = NULL; + last = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (!delpol && + pol->type == policy->type && + !selector_cmp(&pol->selector, &policy->selector) && xfrm_sec_ctx_match(pol->security, policy->security)) { if (excl) { write_unlock_bh(&xfrm_policy_lock); return -EEXIST; } - *p = pol->next; delpol = pol; if (policy->priority > pol->priority) continue; } else if (policy->priority >= pol->priority) { - p = &pol->next; + last = &pol->bydst; continue; } if (!newpos) - newpos = p; + newpos = &pol->bydst; if (delpol) break; - p = &pol->next; + last = &pol->bydst; } + if (!newpos) + newpos = last; if (newpos) - p = newpos; + hlist_add_after(newpos, &policy->bydst); + else + hlist_add_head(&policy->bydst, chain); xfrm_pol_hold(policy); - policy->next = *p; - *p = policy; + xfrm_policy_count[dir]++; atomic_inc(&flow_cache_genid); + if (delpol) { + hlist_del(&delpol->bydst); + hlist_del(&delpol->byidx); + xfrm_policy_count[dir]--; + } policy->index = delpol ? delpol->index : xfrm_gen_index(policy->type, dir); + hlist_add_head(&policy->byidx, xfrm_policy_byidx+idx_hash(policy->index)); policy->curlft.add_time = (unsigned long)xtime.tv_sec; policy->curlft.use_time = 0; if (!mod_timer(&policy->timer, jiffies + HZ)) @@ -479,10 +748,13 @@ int xfrm_policy_insert(int dir, struct xfrm_policy *policy, int excl) if (delpol) xfrm_policy_kill(delpol); + else if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); read_lock_bh(&xfrm_policy_lock); gc_list = NULL; - for (policy = policy->next; policy; policy = policy->next) { + entry = &policy->bydst; + hlist_for_each_entry_continue(policy, entry, bydst) { struct dst_entry *dst; write_lock(&policy->lock); @@ -515,67 +787,112 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_selector *sel, struct xfrm_sec_ctx *ctx, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { - if ((memcmp(sel, &pol->selector, sizeof(*sel)) == 0) && - (xfrm_sec_ctx_match(ctx, pol->security))) { + chain = policy_hash_bysel(sel, sel->family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (pol->type == type && + !selector_cmp(sel, &pol->selector) && + xfrm_sec_ctx_match(ctx, pol->security)) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_bysel_ctx); struct xfrm_policy *xfrm_policy_byid(u8 type, int dir, u32 id, int delete) { - struct xfrm_policy *pol, **p; + struct xfrm_policy *pol, *ret; + struct hlist_head *chain; + struct hlist_node *entry; write_lock_bh(&xfrm_policy_lock); - for (p = XFRM_POLICY_LISTHEADP(type, dir); (pol=*p)!=NULL; p = &pol->next) { - if (pol->index == id) { + chain = xfrm_policy_byidx + idx_hash(id); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, byidx) { + if (pol->type == type && pol->index == id) { xfrm_pol_hold(pol); - if (delete) - *p = pol->next; + if (delete) { + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + } + ret = pol; break; } } write_unlock_bh(&xfrm_policy_lock); - if (pol && delete) { + if (ret && delete) { atomic_inc(&flow_cache_genid); - xfrm_policy_kill(pol); + xfrm_policy_kill(ret); } - return pol; + return ret; } EXPORT_SYMBOL(xfrm_policy_byid); void xfrm_policy_flush(u8 type) { - struct xfrm_policy *xp; - struct xfrm_policy **p_list = XFRM_POLICY_LISTS(type); int dir; write_lock_bh(&xfrm_policy_lock); for (dir = 0; dir < XFRM_POLICY_MAX; dir++) { - while ((xp = p_list[dir]) != NULL) { - p_list[dir] = xp->next; + struct xfrm_policy *pol; + struct hlist_node *entry; + int i; + + again1: + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); write_unlock_bh(&xfrm_policy_lock); - xfrm_policy_kill(xp); + xfrm_policy_kill(pol); write_lock_bh(&xfrm_policy_lock); + goto again1; + } + + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + again2: + hlist_for_each_entry(pol, entry, + xfrm_policy_bydst[dir].table + i, + bydst) { + if (pol->type != type) + continue; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + write_unlock_bh(&xfrm_policy_lock); + + xfrm_policy_kill(pol); + + write_lock_bh(&xfrm_policy_lock); + goto again2; + } } + + xfrm_policy_count[dir] = 0; } atomic_inc(&flow_cache_genid); write_unlock_bh(&xfrm_policy_lock); @@ -585,15 +902,27 @@ EXPORT_SYMBOL(xfrm_policy_flush); int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*), void *data) { - struct xfrm_policy *xp; - int dir; - int count = 0; - int error = 0; + struct xfrm_policy *pol; + struct hlist_node *entry; + int dir, count, error; read_lock_bh(&xfrm_policy_lock); + count = 0; for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) - count++; + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type == type) + count++; + } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type == type) + count++; + } + } } if (count == 0) { @@ -602,13 +931,28 @@ int xfrm_policy_walk(u8 type, int (*func)(struct xfrm_policy *, int, int, void*) } for (dir = 0; dir < 2*XFRM_POLICY_MAX; dir++) { - for (xp = XFRM_POLICY_LISTHEAD(type, dir); xp; xp = xp->next) { - error = func(xp, dir%XFRM_POLICY_MAX, --count, data); + struct hlist_head *table = xfrm_policy_bydst[dir].table; + int i; + + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); if (error) goto out; } + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) { + if (pol->type != type) + continue; + error = func(pol, dir % XFRM_POLICY_MAX, --count, data); + if (error) + goto out; + } + } } - + error = 0; out: read_unlock_bh(&xfrm_policy_lock); return error; @@ -617,31 +961,61 @@ EXPORT_SYMBOL(xfrm_policy_walk); /* Find policy to apply to this flow. */ -static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, - u16 family, u8 dir) +static int xfrm_policy_match(struct xfrm_policy *pol, struct flowi *fl, + u8 type, u16 family, int dir) { - struct xfrm_policy *pol; + struct xfrm_selector *sel = &pol->selector; + int match; - read_lock_bh(&xfrm_policy_lock); - for (pol = XFRM_POLICY_LISTHEAD(type, dir); pol; pol = pol->next) { - struct xfrm_selector *sel = &pol->selector; - int match; + if (pol->family != family || + pol->type != type) + return 0; - if (pol->family != family) - continue; + match = xfrm_selector_match(sel, fl, family); + if (match) { + if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) + return 1; + } + + return 0; +} - match = xfrm_selector_match(sel, fl, family); +static struct xfrm_policy *xfrm_policy_lookup_bytype(u8 type, struct flowi *fl, + u16 family, u8 dir) +{ + struct xfrm_policy *pol, *ret; + xfrm_address_t *daddr, *saddr; + struct hlist_node *entry; + struct hlist_head *chain; - if (match) { - if (!security_xfrm_policy_lookup(pol, fl->secid, dir)) { + daddr = xfrm_flowi_daddr(fl, family); + saddr = xfrm_flowi_saddr(fl, family); + if (unlikely(!daddr || !saddr)) + return NULL; + + read_lock_bh(&xfrm_policy_lock); + chain = policy_hash_direct(daddr, saddr, family, dir); + ret = NULL; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir)) { + xfrm_pol_hold(pol); + ret = pol; + break; + } + } + if (!ret) { + chain = &xfrm_policy_inexact[dir]; + hlist_for_each_entry(pol, entry, chain, bydst) { + if (xfrm_policy_match(pol, fl, type, family, dir)) { xfrm_pol_hold(pol); + ret = pol; break; } } } read_unlock_bh(&xfrm_policy_lock); - return pol; + return ret; } static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, @@ -657,7 +1031,7 @@ static void xfrm_policy_lookup(struct flowi *fl, u16 family, u8 dir, pol = xfrm_policy_lookup_bytype(XFRM_POLICY_TYPE_MAIN, fl, family, dir); #ifdef CONFIG_XFRM_SUB_POLICY - end: +end: #endif if ((*objp = (void *) pol) != NULL) *obj_refp = &pol->refcnt; @@ -704,26 +1078,29 @@ static struct xfrm_policy *xfrm_sk_policy_lookup(struct sock *sk, int dir, struc static void __xfrm_policy_link(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **p_list = XFRM_POLICY_LISTS(pol->type); + struct hlist_head *chain = policy_hash_bysel(&pol->selector, + pol->family, dir); - pol->next = p_list[dir]; - p_list[dir] = pol; + hlist_add_head(&pol->bydst, chain); + hlist_add_head(&pol->byidx, xfrm_policy_byidx+idx_hash(pol->index)); + xfrm_policy_count[dir]++; xfrm_pol_hold(pol); + + if (xfrm_bydst_should_resize(dir, NULL)) + schedule_work(&xfrm_hash_work); } static struct xfrm_policy *__xfrm_policy_unlink(struct xfrm_policy *pol, int dir) { - struct xfrm_policy **polp; + if (hlist_unhashed(&pol->bydst)) + return NULL; - for (polp = XFRM_POLICY_LISTHEADP(pol->type, dir); - *polp != NULL; polp = &(*polp)->next) { - if (*polp == pol) { - *polp = pol->next; - return pol; - } - } - return NULL; + hlist_del(&pol->bydst); + hlist_del(&pol->byidx); + xfrm_policy_count[dir]--; + + return pol; } int xfrm_policy_delete(struct xfrm_policy *pol, int dir) @@ -968,7 +1345,8 @@ restart: if (!policy) { /* To accelerate a bit... */ - if ((dst_orig->flags & DST_NOXFRM) || xfrm_policy_lists_empty(XFRM_POLICY_OUT)) + if ((dst_orig->flags & DST_NOXFRM) || + !xfrm_policy_count[XFRM_POLICY_OUT]) return 0; policy = flow_cache_lookup(fl, dst_orig->ops->family, @@ -1413,50 +1791,50 @@ static struct dst_entry *xfrm_negative_advice(struct dst_entry *dst) return dst; } +static void prune_one_bundle(struct xfrm_policy *pol, int (*func)(struct dst_entry *), struct dst_entry **gc_list_p) +{ + struct dst_entry *dst, **dstp; + + write_lock(&pol->lock); + dstp = &pol->bundles; + while ((dst=*dstp) != NULL) { + if (func(dst)) { + *dstp = dst->next; + dst->next = *gc_list_p; + *gc_list_p = dst; + } else { + dstp = &dst->next; + } + } + write_unlock(&pol->lock); +} + static void xfrm_prune_bundles(int (*func)(struct dst_entry *)) { - int i; - struct xfrm_policy *pol; - struct dst_entry *dst, **dstp, *gc_list = NULL; + struct dst_entry *gc_list = NULL; + int dir; read_lock_bh(&xfrm_policy_lock); - for (i=0; i<2*XFRM_POLICY_MAX; i++) { -#ifdef CONFIG_XFRM_SUB_POLICY - for (pol = xfrm_policy_list_sub[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); - } + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy *pol; + struct hlist_node *entry; + struct hlist_head *table; + int i; -#endif - for (pol = xfrm_policy_list[i]; pol; pol = pol->next) { - write_lock(&pol->lock); - dstp = &pol->bundles; - while ((dst=*dstp) != NULL) { - if (func(dst)) { - *dstp = dst->next; - dst->next = gc_list; - gc_list = dst; - } else { - dstp = &dst->next; - } - } - write_unlock(&pol->lock); + hlist_for_each_entry(pol, entry, + &xfrm_policy_inexact[dir], bydst) + prune_one_bundle(pol, func, &gc_list); + + table = xfrm_policy_bydst[dir].table; + for (i = xfrm_policy_bydst[dir].hmask; i >= 0; i--) { + hlist_for_each_entry(pol, entry, table + i, bydst) + prune_one_bundle(pol, func, &gc_list); } } read_unlock_bh(&xfrm_policy_lock); while (gc_list) { - dst = gc_list; + struct dst_entry *dst = gc_list; gc_list = dst->next; dst_free(dst); } @@ -1680,6 +2058,9 @@ static struct notifier_block xfrm_dev_notifier = { static void __init xfrm_policy_init(void) { + unsigned int hmask, sz; + int dir; + xfrm_dst_cache = kmem_cache_create("xfrm_dst_cache", sizeof(struct xfrm_dst), 0, SLAB_HWCACHE_ALIGN, @@ -1687,6 +2068,26 @@ static void __init xfrm_policy_init(void) if (!xfrm_dst_cache) panic("XFRM: failed to allocate xfrm_dst_cache\n"); + hmask = 8 - 1; + sz = (hmask+1) * sizeof(struct hlist_head); + + xfrm_policy_byidx = xfrm_policy_hash_alloc(sz); + xfrm_idx_hmask = hmask; + if (!xfrm_policy_byidx) + panic("XFRM: failed to allocate byidx hash\n"); + + for (dir = 0; dir < XFRM_POLICY_MAX * 2; dir++) { + struct xfrm_policy_hash *htab; + + INIT_HLIST_HEAD(&xfrm_policy_inexact[dir]); + + htab = &xfrm_policy_bydst[dir]; + htab->table = xfrm_policy_hash_alloc(sz); + htab->hmask = hmask; + if (!htab->table) + panic("XFRM: failed to allocate bydst hash\n"); + } + INIT_WORK(&xfrm_policy_gc_work, xfrm_policy_gc_task, NULL); register_netdevice_notifier(&xfrm_dev_notifier); } -- cgit v1.3-8-gc7d7 From e4bec827feda76d5e7417a2696a75424834d564f Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 22 Sep 2006 15:17:35 -0700 Subject: [IPSEC] esp: Defer output IV initialization to first use. First of all, if the xfrm_state only gets used for input packets this entropy is a complete waste. Secondly, it is often the case that a configuration loads many rules (perhaps even dynamically) and they don't all necessarily ever get used. This get_random_bytes() call was showing up in the profiles for xfrm_state inserts which is how I noticed this. Signed-off-by: David S. Miller --- include/net/esp.h | 5 +++-- net/ipv4/esp4.c | 9 +++++++-- net/ipv6/esp6.c | 9 +++++++-- 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/esp.h b/include/net/esp.h index 064366d66eea..713d039f4af7 100644 --- a/include/net/esp.h +++ b/include/net/esp.h @@ -15,13 +15,14 @@ struct esp_data struct { u8 *key; /* Key */ int key_len; /* Key length */ - u8 *ivec; /* ivec buffer */ + int padlen; /* 0..255 */ /* ivlen is offset from enc_data, where encrypted data start. * It is logically different of crypto_tfm_alg_ivsize(tfm). * We assume that it is either zero (no ivec), or * >= crypto_tfm_alg_ivsize(tfm). */ int ivlen; - int padlen; /* 0..255 */ + int ivinitted; + u8 *ivec; /* ivec buffer */ struct crypto_blkcipher *tfm; /* crypto handle */ } conf; diff --git a/net/ipv4/esp4.c b/net/ipv4/esp4.c index e87377e1d6b6..13b29360d102 100644 --- a/net/ipv4/esp4.c +++ b/net/ipv4/esp4.c @@ -95,8 +95,13 @@ static int esp_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -378,7 +383,7 @@ static int esp_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index ae50b9511151..e78680a9985b 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -99,8 +99,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph->seq_no = htonl(++x->replay.oseq); xfrm_aevent_doreplay(x); - if (esp->conf.ivlen) + if (esp->conf.ivlen) { + if (unlikely(!esp->conf.ivinitted)) { + get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 1; + } crypto_blkcipher_set_iv(tfm, esp->conf.ivec, esp->conf.ivlen); + } do { struct scatterlist *sg = &esp->sgbuf[0]; @@ -353,7 +358,7 @@ static int esp6_init_state(struct xfrm_state *x) esp->conf.ivec = kmalloc(esp->conf.ivlen, GFP_KERNEL); if (unlikely(esp->conf.ivec == NULL)) goto error; - get_random_bytes(esp->conf.ivec, esp->conf.ivlen); + esp->conf.ivinitted = 0; } if (crypto_blkcipher_setkey(tfm, esp->conf.key, esp->conf.key_len)) goto error; -- cgit v1.3-8-gc7d7 From 75bff8f023e02b045a8f68f36fa7da98dca124b8 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Mon, 21 Aug 2006 19:22:01 +0900 Subject: [IPV6] ROUTE: Routing by FWMARK. Based on patch by Jean Lorchat . Signed-off-by: YOSHIFUJI Hideaki --- include/linux/fib_rules.h | 2 +- include/net/flow.h | 2 ++ net/ipv6/Kconfig | 7 +++++++ net/ipv6/fib6_rules.c | 23 +++++++++++++++++++++++ net/ipv6/route.c | 1 + 5 files changed, 34 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/linux/fib_rules.h b/include/linux/fib_rules.h index 19a82b6c1c1f..2987549d6044 100644 --- a/include/linux/fib_rules.h +++ b/include/linux/fib_rules.h @@ -34,7 +34,7 @@ enum FRA_UNUSED3, FRA_UNUSED4, FRA_UNUSED5, - FRA_FWMARK, /* netfilter mark (IPv4) */ + FRA_FWMARK, /* netfilter mark (IPv4/IPv6) */ FRA_FLOW, /* flow/class id */ FRA_UNUSED6, FRA_UNUSED7, diff --git a/include/net/flow.h b/include/net/flow.h index e0522914316e..3ca210ec1379 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -26,6 +26,7 @@ struct flowi { struct { struct in6_addr daddr; struct in6_addr saddr; + __u32 fwmark; __u32 flowlabel; } ip6_u; @@ -42,6 +43,7 @@ struct flowi { #define fld_scope nl_u.dn_u.scope #define fl6_dst nl_u.ip6_u.daddr #define fl6_src nl_u.ip6_u.saddr +#define fl6_fwmark nl_u.ip6_u.fwmark #define fl6_flowlabel nl_u.ip6_u.flowlabel #define fl4_dst nl_u.ip4_u.daddr #define fl4_src nl_u.ip4_u.saddr diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 21e0cc808f44..a2d211da2aba 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -173,3 +173,10 @@ config IPV6_MULTIPLE_TABLES ---help--- Support multiple routing tables. +config IPV6_ROUTE_FWMARK + bool "IPv6: use netfilter MARK value as routing key" + depends on IPV6_MULTIPLE_TABLES && NETFILTER + ---help--- + If you say Y here, you will be able to specify different routes for + packets with different mark values (see iptables(8), MARK target). + diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 91f6233d8efd..aebd9e2b85a8 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -26,6 +26,9 @@ struct fib6_rule struct fib_rule common; struct rt6key src; struct rt6key dst; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + u8 fwmark; +#endif u8 tclass; }; @@ -124,6 +127,11 @@ static int fib6_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) if (r->tclass && r->tclass != ((ntohl(fl->fl6_flowlabel) >> 20) & 0xff)) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (r->fwmark && (r->fwmark != fl->fl6_fwmark)) + return 0; +#endif + return 1; } @@ -164,6 +172,11 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, nla_memcpy(&rule6->dst.addr, tb[FRA_DST], sizeof(struct in6_addr)); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK]) + rule6->fwmark = nla_get_u32(tb[FRA_FWMARK]); +#endif + rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; rule6->tclass = frh->tos; @@ -195,6 +208,11 @@ static int fib6_rule_compare(struct fib_rule *rule, struct fib_rule_hdr *frh, nla_memcmp(tb[FRA_DST], &rule6->dst.addr, sizeof(struct in6_addr))) return 0; +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (tb[FRA_FWMARK] && (rule6->fwmark != nla_get_u32(tb[FRA_FWMARK]))) + return 0; +#endif + return 1; } @@ -216,6 +234,11 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, NLA_PUT(skb, FRA_SRC, sizeof(struct in6_addr), &rule6->src.addr); +#ifdef CONFIG_IPV6_ROUTE_FWMARK + if (rule6->fwmark) + NLA_PUT_U32(skb, FRA_FWMARK, rule6->fwmark); +#endif + return 0; nla_put_failure: diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 20691285aee5..649350bd9299 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -703,6 +703,7 @@ void ip6_route_input(struct sk_buff *skb) .ip6_u = { .daddr = iph->daddr, .saddr = iph->saddr, + .fwmark = skb->nfmark, .flowlabel = (* (u32 *) iph)&IPV6_FLOWINFO_MASK, }, }, -- cgit v1.3-8-gc7d7 From ff5dfe736dd9f6c74b206aa77c0465dfd503bdb9 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Sat, 26 Aug 2006 19:17:53 -0700 Subject: [NETLINK]: remove third bogus argument from NLA_PUT_FLAG This patch removes the 'value' argument from NLA_PUT_FLAG which is unused anyway. The documentation comment was already correct so it doesn't need an update :) Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 47044da167c5..bcb27e3a312e 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -828,7 +828,7 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_STRING(skb, attrtype, value) \ NLA_PUT(skb, attrtype, strlen(value) + 1, value) -#define NLA_PUT_FLAG(skb, attrtype, value) \ +#define NLA_PUT_FLAG(skb, attrtype) \ NLA_PUT(skb, attrtype, 0, NULL) #define NLA_PUT_MSECS(skb, attrtype, jiffies) \ -- cgit v1.3-8-gc7d7 From 25030a7f9eeab2dcefff036469e0e2b4f956198f Mon Sep 17 00:00:00 2001 From: Gerrit Renker Date: Sat, 26 Aug 2006 20:06:05 -0700 Subject: [UDP]: Unify UDPv4 and UDPv6 ->get_port() This patch creates one common function which is called by udp_v4_get_port() and udp_v6_get_port(). As a result, * duplicated code is removed * udp_port_rover and local port lookup can now be removed from udp.h * further savings follow since the same function will be used by UDP-Litev4 and UDP-Litev6 In contrast to the patch sent in response to Yoshifujis comments (fixed by this variant), the code below also removes the EXPORT_SYMBOL(udp_port_rover), since udp_port_rover can now remain local to net/ipv4/udp.c. Signed-off-by: Gerrit Renker Signed-off-by: David S. Miller --- include/net/udp.h | 18 ++--------- net/ipv4/udp.c | 96 ++++++++++++++++++++++++++++++++++--------------------- net/ipv6/udp.c | 76 ++----------------------------------------- 3 files changed, 64 insertions(+), 126 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index 766fba1369ce..c490a0f662ac 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -30,25 +30,9 @@ #define UDP_HTABLE_SIZE 128 -/* udp.c: This needs to be shared by v4 and v6 because the lookup - * and hashing code needs to work with different AF's yet - * the port space is shared. - */ extern struct hlist_head udp_hash[UDP_HTABLE_SIZE]; extern rwlock_t udp_hash_lock; -extern int udp_port_rover; - -static inline int udp_lport_inuse(u16 num) -{ - struct sock *sk; - struct hlist_node *node; - - sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) - if (inet_sk(sk)->num == num) - return 1; - return 0; -} /* Note: this must match 'valbool' in sock_setsockopt */ #define UDP_CSUM_NOXMIT 1 @@ -63,6 +47,8 @@ extern struct proto udp_prot; struct sk_buff; +extern int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(struct sock *, struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 514c1e9ae810..7552b50bcd84 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -118,14 +118,34 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_statistics) __read_mostly; struct hlist_head udp_hash[UDP_HTABLE_SIZE]; DEFINE_RWLOCK(udp_hash_lock); -/* Shared by v4/v6 udp. */ +/* Shared by v4/v6 udp_get_port */ int udp_port_rover; -static int udp_v4_get_port(struct sock *sk, unsigned short snum) +static inline int udp_lport_inuse(u16 num) { + struct sock *sk; struct hlist_node *node; + + sk_for_each(sk, node, &udp_hash[num & (UDP_HTABLE_SIZE - 1)]) + if (inet_sk(sk)->num == num) + return 1; + return 0; +} + +/** + * udp_get_port - common port lookup for IPv4 and IPv6 + * + * @sk: socket struct in question + * @snum: port number to look up + * @saddr_comp: AF-dependent comparison of bound local IP addresses + */ +int udp_get_port(struct sock *sk, unsigned short snum, + int (*saddr_cmp)(struct sock *sk1, struct sock *sk2)) +{ + struct hlist_node *node; + struct hlist_head *head; struct sock *sk2; - struct inet_sock *inet = inet_sk(sk); + int error = 1; write_lock_bh(&udp_hash_lock); if (snum == 0) { @@ -137,11 +157,10 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) best_size_so_far = 32767; best = result = udp_port_rover; for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - struct hlist_head *list; int size; - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { + head = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; + if (hlist_empty(head)) { if (result > sysctl_local_port_range[1]) result = sysctl_local_port_range[0] + ((result - sysctl_local_port_range[0]) & @@ -149,12 +168,11 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) goto gotit; } size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; + sk_for_each(sk2, node, head) + if (++size < best_size_so_far) { + best_size_so_far = size; + best = result; + } } result = best; for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { @@ -170,38 +188,44 @@ static int udp_v4_get_port(struct sock *sk, unsigned short snum) gotit: udp_port_rover = snum = result; } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - struct inet_sock *inet2 = inet_sk(sk2); - - if (inet2->num == snum && - sk2 != sk && - !ipv6_only_sock(sk2) && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!inet2->rcv_saddr || - !inet->rcv_saddr || - inet2->rcv_saddr == inet->rcv_saddr) && - (!sk2->sk_reuse || !sk->sk_reuse)) + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + + sk_for_each(sk2, node, head) + if (inet_sk(sk2)->num == snum && + sk2 != sk && + (!sk2->sk_reuse || !sk->sk_reuse) && + (!sk2->sk_bound_dev_if || !sk->sk_bound_dev_if + || sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && + (*saddr_cmp)(sk, sk2) ) goto fail; - } } - inet->num = snum; + inet_sk(sk)->num = snum; if (sk_unhashed(sk)) { - struct hlist_head *h = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; - - sk_add_node(sk, h); + head = &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]; + sk_add_node(sk, head); sock_prot_inc_use(sk->sk_prot); } - write_unlock_bh(&udp_hash_lock); - return 0; - + error = 0; fail: write_unlock_bh(&udp_hash_lock); - return 1; + return error; +} + +static inline int ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2) +{ + struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); + + return ( !ipv6_only_sock(sk2) && + (!inet1->rcv_saddr || !inet2->rcv_saddr || + inet1->rcv_saddr == inet2->rcv_saddr )); +} + +static inline int udp_v4_get_port(struct sock *sk, unsigned short snum) +{ + return udp_get_port(sk, snum, ipv4_rcv_saddr_equal); } + static void udp_v4_hash(struct sock *sk) { BUG(); @@ -1596,7 +1620,7 @@ EXPORT_SYMBOL(udp_disconnect); EXPORT_SYMBOL(udp_hash); EXPORT_SYMBOL(udp_hash_lock); EXPORT_SYMBOL(udp_ioctl); -EXPORT_SYMBOL(udp_port_rover); +EXPORT_SYMBOL(udp_get_port); EXPORT_SYMBOL(udp_prot); EXPORT_SYMBOL(udp_sendmsg); EXPORT_SYMBOL(udp_poll); diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index b9cc55ccb000..9662561701d1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -61,81 +61,9 @@ DEFINE_SNMP_STAT(struct udp_mib, udp_stats_in6) __read_mostly; -/* Grrr, addr_type already calculated by caller, but I don't want - * to add some silly "cookie" argument to this method just for that. - */ -static int udp_v6_get_port(struct sock *sk, unsigned short snum) +static inline int udp_v6_get_port(struct sock *sk, unsigned short snum) { - struct sock *sk2; - struct hlist_node *node; - - write_lock_bh(&udp_hash_lock); - if (snum == 0) { - int best_size_so_far, best, result, i; - - if (udp_port_rover > sysctl_local_port_range[1] || - udp_port_rover < sysctl_local_port_range[0]) - udp_port_rover = sysctl_local_port_range[0]; - best_size_so_far = 32767; - best = result = udp_port_rover; - for (i = 0; i < UDP_HTABLE_SIZE; i++, result++) { - int size; - struct hlist_head *list; - - list = &udp_hash[result & (UDP_HTABLE_SIZE - 1)]; - if (hlist_empty(list)) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] + - ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - goto gotit; - } - size = 0; - sk_for_each(sk2, node, list) - if (++size >= best_size_so_far) - goto next; - best_size_so_far = size; - best = result; - next:; - } - result = best; - for(i = 0; i < (1 << 16) / UDP_HTABLE_SIZE; i++, result += UDP_HTABLE_SIZE) { - if (result > sysctl_local_port_range[1]) - result = sysctl_local_port_range[0] - + ((result - sysctl_local_port_range[0]) & - (UDP_HTABLE_SIZE - 1)); - if (!udp_lport_inuse(result)) - break; - } - if (i >= (1 << 16) / UDP_HTABLE_SIZE) - goto fail; -gotit: - udp_port_rover = snum = result; - } else { - sk_for_each(sk2, node, - &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]) { - if (inet_sk(sk2)->num == snum && - sk2 != sk && - (!sk2->sk_bound_dev_if || - !sk->sk_bound_dev_if || - sk2->sk_bound_dev_if == sk->sk_bound_dev_if) && - (!sk2->sk_reuse || !sk->sk_reuse) && - ipv6_rcv_saddr_equal(sk, sk2)) - goto fail; - } - } - - inet_sk(sk)->num = snum; - if (sk_unhashed(sk)) { - sk_add_node(sk, &udp_hash[snum & (UDP_HTABLE_SIZE - 1)]); - sock_prot_inc_use(sk->sk_prot); - } - write_unlock_bh(&udp_hash_lock); - return 0; - -fail: - write_unlock_bh(&udp_hash_lock); - return 1; + return udp_get_port(sk, snum, ipv6_rcv_saddr_equal); } static void udp_v6_hash(struct sock *sk) -- cgit v1.3-8-gc7d7 From e3b4eadbea77ecb3c3a74d1bc81b392f454c7f2e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 26 Aug 2006 20:10:15 -0700 Subject: [UDP]: saddr_cmp function should take const socket pointers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This also kills a warning while building ipv6: net/ipv6/udp.c: In function ‘udp_v6_get_port’: net/ipv6/udp.c:66: warning: passing argument 3 of ‘udp_get_port’ from incompatible pointer type Signed-off-by: David S. Miller --- include/net/udp.h | 2 +- net/ipv4/udp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/udp.h b/include/net/udp.h index c490a0f662ac..db0c05f67546 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -48,7 +48,7 @@ extern struct proto udp_prot; struct sk_buff; extern int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(struct sock *, struct sock *)); + int (*saddr_cmp)(const struct sock *, const struct sock *)); extern void udp_err(struct sk_buff *, u32); extern int udp_sendmsg(struct kiocb *iocb, struct sock *sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index aa1823050b00..77e265d7bb8f 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -139,7 +139,7 @@ static inline int udp_lport_inuse(u16 num) * @saddr_comp: AF-dependent comparison of bound local IP addresses */ int udp_get_port(struct sock *sk, unsigned short snum, - int (*saddr_cmp)(struct sock *sk1, struct sock *sk2)) + int (*saddr_cmp)(const struct sock *sk1, const struct sock *sk2)) { struct hlist_node *node; struct hlist_head *head; @@ -210,7 +210,7 @@ fail: return error; } -static inline int ipv4_rcv_saddr_equal(struct sock *sk1, struct sock *sk2) +static inline int ipv4_rcv_saddr_equal(const struct sock *sk1, const struct sock *sk2) { struct inet_sock *inet1 = inet_sk(sk1), *inet2 = inet_sk(sk2); -- cgit v1.3-8-gc7d7 From a5531a5d852008be40811496029012f4ad3093d1 Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Sat, 26 Aug 2006 20:11:47 -0700 Subject: [NETLINK]: Improve string attribute validation Introduces a new attribute type NLA_NUL_STRING to support NUL terminated strings. Attributes of this kind require to carry a terminating NUL within the maximum specified in the policy. The `old' NLA_STRING which is not required to be NUL terminated is extended to provide means to specify a maximum length of the string. Aims at easing the pain with using nla_strlcpy() on temporary buffers. Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/net/netlink.h | 15 +++++++++++---- net/netlink/attr.c | 49 +++++++++++++++++++++++++++++++++++++++---------- 2 files changed, 50 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index bcb27e3a312e..11dc2e7f679a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -167,6 +167,7 @@ enum { NLA_FLAG, NLA_MSECS, NLA_NESTED, + NLA_NUL_STRING, __NLA_TYPE_MAX, }; @@ -175,21 +176,27 @@ enum { /** * struct nla_policy - attribute validation policy * @type: Type of attribute or NLA_UNSPEC - * @minlen: Minimal length of payload required to be available + * @len: Type specific length of payload * * Policies are defined as arrays of this struct, the array must be * accessible by attribute type up to the highest identifier to be expected. * + * Meaning of `len' field: + * NLA_STRING Maximum length of string + * NLA_NUL_STRING Maximum length of string (excluding NUL) + * NLA_FLAG Unused + * All other Exact length of attribute payload + * * Example: * static struct nla_policy my_policy[ATTR_MAX+1] __read_mostly = { * [ATTR_FOO] = { .type = NLA_U16 }, - * [ATTR_BAR] = { .type = NLA_STRING }, - * [ATTR_BAZ] = { .minlen = sizeof(struct mystruct) }, + * [ATTR_BAR] = { .type = NLA_STRING, len = BARSIZ }, + * [ATTR_BAZ] = { .len = sizeof(struct mystruct) }, * }; */ struct nla_policy { u16 type; - u16 minlen; + u16 len; }; /** diff --git a/net/netlink/attr.c b/net/netlink/attr.c index 136e529e5780..004139557e09 100644 --- a/net/netlink/attr.c +++ b/net/netlink/attr.c @@ -20,7 +20,6 @@ static u16 nla_attr_minlen[NLA_TYPE_MAX+1] __read_mostly = { [NLA_U16] = sizeof(u16), [NLA_U32] = sizeof(u32), [NLA_U64] = sizeof(u64), - [NLA_STRING] = 1, [NLA_NESTED] = NLA_HDRLEN, }; @@ -28,7 +27,7 @@ static int validate_nla(struct nlattr *nla, int maxtype, struct nla_policy *policy) { struct nla_policy *pt; - int minlen = 0; + int minlen = 0, attrlen = nla_len(nla); if (nla->nla_type <= 0 || nla->nla_type > maxtype) return 0; @@ -37,16 +36,46 @@ static int validate_nla(struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - if (pt->minlen) - minlen = pt->minlen; - else if (pt->type != NLA_UNSPEC) - minlen = nla_attr_minlen[pt->type]; + switch (pt->type) { + case NLA_FLAG: + if (attrlen > 0) + return -ERANGE; + break; - if (pt->type == NLA_FLAG && nla_len(nla) > 0) - return -ERANGE; + case NLA_NUL_STRING: + if (pt->len) + minlen = min_t(int, attrlen, pt->len + 1); + else + minlen = attrlen; - if (nla_len(nla) < minlen) - return -ERANGE; + if (!minlen || memchr(nla_data(nla), '\0', minlen) == NULL) + return -EINVAL; + /* fall through */ + + case NLA_STRING: + if (attrlen < 1) + return -ERANGE; + + if (pt->len) { + char *buf = nla_data(nla); + + if (buf[attrlen - 1] == '\0') + attrlen--; + + if (attrlen > pt->len) + return -ERANGE; + } + break; + + default: + if (pt->len) + minlen = pt->len; + else if (pt->type != NLA_UNSPEC) + minlen = nla_attr_minlen[pt->type]; + + if (attrlen < minlen) + return -ERANGE; + } return 0; } -- cgit v1.3-8-gc7d7 From 1b7f775209bbee6b993587bae69acb9fc12ceb17 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:54:17 -0700 Subject: [NetLabel]: remove unused function prototypes Removed some older function prototypes for functions that no longer exist. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index c7175e725804..5aed72ab652b 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -200,15 +200,9 @@ static inline int cipso_v4_cache_add(const struct sk_buff *skb, #ifdef CONFIG_NETLABEL void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); -int cipso_v4_socket_setopt(struct socket *sock, - unsigned char *opt, - u32 opt_len); int cipso_v4_socket_setattr(const struct socket *sock, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); -int cipso_v4_socket_getopt(const struct socket *sock, - unsigned char **opt, - u32 *opt_len); int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, -- cgit v1.3-8-gc7d7 From 7a0e1d602288370801c353221c6a938eab925053 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 29 Aug 2006 17:56:04 -0700 Subject: [NetLabel]: add some missing #includes to various header files Add some missing include files to the NetLabel related header files. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 2 ++ include/net/netlabel.h | 1 + net/netlabel/netlabel_domainhash.h | 4 ++++ net/netlabel/netlabel_user.h | 5 +++-- security/selinux/include/selinux_netlabel.h | 9 +++++++++ 5 files changed, 19 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5aed72ab652b..59406e0dc5b2 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -37,6 +37,8 @@ #include #include #include +#include +#include #include /* known doi values */ diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 7cae730832c7..fc2b72fc7e07 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -31,6 +31,7 @@ #define _NETLABEL_H #include +#include #include #include diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 9217863ce0d3..99a2287de246 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -32,6 +32,10 @@ #ifndef _NETLABEL_DOMAINHASH_H #define _NETLABEL_DOMAINHASH_H +#include +#include +#include + /* Domain hash table size */ /* XXX - currently this number is an uneducated guess */ #define NETLBL_DOMHSH_BITSIZE 7 diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index ccf237b3a128..385a6c7488c6 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -31,11 +31,12 @@ #ifndef _NETLABEL_USER_H #define _NETLABEL_USER_H +#include #include #include -#include -#include +#include #include +#include /* NetLabel NETLINK helper functions */ diff --git a/security/selinux/include/selinux_netlabel.h b/security/selinux/include/selinux_netlabel.h index d69ec650cdbe..ecab4bddaaf4 100644 --- a/security/selinux/include/selinux_netlabel.h +++ b/security/selinux/include/selinux_netlabel.h @@ -27,6 +27,15 @@ #ifndef _SELINUX_NETLABEL_H_ #define _SELINUX_NETLABEL_H_ +#include +#include +#include +#include +#include + +#include "avc.h" +#include "objsec.h" + #ifdef CONFIG_NETLABEL void selinux_netlbl_cache_invalidate(void); int selinux_netlbl_socket_post_create(struct socket *sock, -- cgit v1.3-8-gc7d7 From fda9ef5d679b07c9d9097aaf6ef7f069d794a8f9 Mon Sep 17 00:00:00 2001 From: Dmitry Mishin Date: Thu, 31 Aug 2006 15:28:39 -0700 Subject: [NET]: Fix sk->sk_filter field access Function sk_filter() is called from tcp_v{4,6}_rcv() functions with arg needlock = 0, while socket is not locked at that moment. In order to avoid this and similar issues in the future, use rcu for sk->sk_filter field read protection. Signed-off-by: Dmitry Mishin Signed-off-by: Alexey Kuznetsov Signed-off-by: Kirill Korotaev --- include/linux/filter.h | 13 +++++++------ include/net/sock.h | 34 +++++++++++++++++----------------- net/core/filter.c | 8 ++++---- net/core/sock.c | 22 +++++++++------------- net/dccp/ipv6.c | 2 +- net/decnet/dn_nsp_in.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 4 ++-- net/packet/af_packet.c | 43 ++++++++++++++++++------------------------- net/sctp/input.c | 2 +- 10 files changed, 61 insertions(+), 71 deletions(-) (limited to 'include/net') diff --git a/include/linux/filter.h b/include/linux/filter.h index c6cb8f095088..91b2e3b9251e 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -25,10 +25,10 @@ struct sock_filter /* Filter block */ { - __u16 code; /* Actual filter code */ - __u8 jt; /* Jump true */ - __u8 jf; /* Jump false */ - __u32 k; /* Generic multiuse field */ + __u16 code; /* Actual filter code */ + __u8 jt; /* Jump true */ + __u8 jf; /* Jump false */ + __u32 k; /* Generic multiuse field */ }; struct sock_fprog /* Required for SO_ATTACH_FILTER. */ @@ -41,8 +41,9 @@ struct sock_fprog /* Required for SO_ATTACH_FILTER. */ struct sk_filter { atomic_t refcnt; - unsigned int len; /* Number of filter blocks */ - struct sock_filter insns[0]; + unsigned int len; /* Number of filter blocks */ + struct rcu_head rcu; + struct sock_filter insns[0]; }; static inline unsigned int sk_filter_len(struct sk_filter *fp) diff --git a/include/net/sock.h b/include/net/sock.h index 337ebec84c70..edd4d73ce7f5 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -862,30 +862,24 @@ extern void sock_init_data(struct socket *sock, struct sock *sk); * */ -static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) +static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { int err; + struct sk_filter *filter; err = security_sock_rcv_skb(sk, skb); if (err) return err; - if (sk->sk_filter) { - struct sk_filter *filter; - - if (needlock) - bh_lock_sock(sk); - - filter = sk->sk_filter; - if (filter) { - unsigned int pkt_len = sk_run_filter(skb, filter->insns, - filter->len); - err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; - } - - if (needlock) - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = sk->sk_filter; + if (filter) { + unsigned int pkt_len = sk_run_filter(skb, filter->insns, + filter->len); + err = pkt_len ? pskb_trim(skb, pkt_len) : -EPERM; } + rcu_read_unlock_bh(); + return err; } @@ -897,6 +891,12 @@ static inline int sk_filter(struct sock *sk, struct sk_buff *skb, int needlock) * Remove a filter from a socket and release its resources. */ +static inline void sk_filter_rcu_free(struct rcu_head *rcu) +{ + struct sk_filter *fp = container_of(rcu, struct sk_filter, rcu); + kfree(fp); +} + static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) { unsigned int size = sk_filter_len(fp); @@ -904,7 +904,7 @@ static inline void sk_filter_release(struct sock *sk, struct sk_filter *fp) atomic_sub(size, &sk->sk_omem_alloc); if (atomic_dec_and_test(&fp->refcnt)) - kfree(fp); + call_rcu_bh(&fp->rcu, sk_filter_rcu_free); } static inline void sk_filter_charge(struct sock *sk, struct sk_filter *fp) diff --git a/net/core/filter.c b/net/core/filter.c index 5b4486a60cf6..6732782a5a40 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -422,10 +422,10 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (!err) { struct sk_filter *old_fp; - spin_lock_bh(&sk->sk_lock.slock); - old_fp = sk->sk_filter; - sk->sk_filter = fp; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_lock_bh(); + old_fp = rcu_dereference(sk->sk_filter); + rcu_assign_pointer(sk->sk_filter, fp); + rcu_read_unlock_bh(); fp = old_fp; } diff --git a/net/core/sock.c b/net/core/sock.c index cfaf09039b02..b77e155cbe6c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -247,11 +247,7 @@ int sock_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) goto out; } - /* It would be deadlock, if sock_queue_rcv_skb is used - with socket lock! We assume that users of this - function are lock free. - */ - err = sk_filter(sk, skb, 1); + err = sk_filter(sk, skb); if (err) goto out; @@ -278,7 +274,7 @@ int sk_receive_skb(struct sock *sk, struct sk_buff *skb) { int rc = NET_RX_SUCCESS; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; @@ -606,15 +602,15 @@ set_rcvbuf: break; case SO_DETACH_FILTER: - spin_lock_bh(&sk->sk_lock.slock); - filter = sk->sk_filter; + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); if (filter) { - sk->sk_filter = NULL; - spin_unlock_bh(&sk->sk_lock.slock); + rcu_assign_pointer(sk->sk_filter, NULL); sk_filter_release(sk, filter); + rcu_read_unlock_bh(); break; } - spin_unlock_bh(&sk->sk_lock.slock); + rcu_read_unlock_bh(); ret = -ENONET; break; @@ -884,10 +880,10 @@ void sk_free(struct sock *sk) if (sk->sk_destruct) sk->sk_destruct(sk); - filter = sk->sk_filter; + filter = rcu_dereference(sk->sk_filter); if (filter) { sk_filter_release(sk, filter); - sk->sk_filter = NULL; + rcu_assign_pointer(sk->sk_filter, NULL); } sock_disable_timestamp(sk); diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c index f9c5e12d7038..7a47399cf31f 100644 --- a/net/dccp/ipv6.c +++ b/net/dccp/ipv6.c @@ -970,7 +970,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return dccp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* diff --git a/net/decnet/dn_nsp_in.c b/net/decnet/dn_nsp_in.c index 86f7f3b28e70..72ecc6e62ec4 100644 --- a/net/decnet/dn_nsp_in.c +++ b/net/decnet/dn_nsp_in.c @@ -586,7 +586,7 @@ static __inline__ int dn_queue_skb(struct sock *sk, struct sk_buff *skb, int sig goto out; } - err = sk_filter(sk, skb, 0); + err = sk_filter(sk, skb); if (err) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 23b46e36b147..39b179856082 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1104,7 +1104,7 @@ process: goto discard_and_relse; nf_reset(skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 2b18918f3011..2546fc9f0a78 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1075,7 +1075,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) if (skb->protocol == htons(ETH_P_IP)) return tcp_v4_do_rcv(sk, skb); - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard; /* @@ -1232,7 +1232,7 @@ process: if (!xfrm6_policy_check(sk, XFRM_POLICY_IN, skb)) goto discard_and_relse; - if (sk_filter(sk, skb, 0)) + if (sk_filter(sk, skb)) goto discard_and_relse; skb->dev = NULL; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 300215bdbf46..f4ccb90e6739 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -427,21 +427,24 @@ out_unlock: } #endif -static inline unsigned run_filter(struct sk_buff *skb, struct sock *sk, unsigned res) +static inline int run_filter(struct sk_buff *skb, struct sock *sk, + unsigned *snaplen) { struct sk_filter *filter; + int err = 0; - bh_lock_sock(sk); - filter = sk->sk_filter; - /* - * Our caller already checked that filter != NULL but we need to - * verify that under bh_lock_sock() to be safe - */ - if (likely(filter != NULL)) - res = sk_run_filter(skb, filter->insns, filter->len); - bh_unlock_sock(sk); + rcu_read_lock_bh(); + filter = rcu_dereference(sk->sk_filter); + if (filter != NULL) { + err = sk_run_filter(skb, filter->insns, filter->len); + if (!err) + err = -EPERM; + else if (*snaplen > err) + *snaplen = err; + } + rcu_read_unlock_bh(); - return res; + return err; } /* @@ -491,13 +494,8 @@ static int packet_rcv(struct sk_buff *skb, struct net_device *dev, struct packet snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (atomic_read(&sk->sk_rmem_alloc) + skb->truesize >= (unsigned)sk->sk_rcvbuf) @@ -593,13 +591,8 @@ static int tpacket_rcv(struct sk_buff *skb, struct net_device *dev, struct packe snaplen = skb->len; - if (sk->sk_filter) { - unsigned res = run_filter(skb, sk, snaplen); - if (res == 0) - goto drop_n_restore; - if (snaplen > res) - snaplen = res; - } + if (run_filter(skb, sk, &snaplen) < 0) + goto drop_n_restore; if (sk->sk_type == SOCK_DGRAM) { macoff = netoff = TPACKET_ALIGN(TPACKET_HDRLEN) + 16; diff --git a/net/sctp/input.c b/net/sctp/input.c index 8a34d95602ce..03f65de75d88 100644 --- a/net/sctp/input.c +++ b/net/sctp/input.c @@ -228,7 +228,7 @@ int sctp_rcv(struct sk_buff *skb) goto discard_release; nf_reset(skb); - if (sk_filter(sk, skb, 1)) + if (sk_filter(sk, skb)) goto discard_release; /* Create an SCTP packet structure. */ -- cgit v1.3-8-gc7d7 From eb878e84575fbce21d2edb079eada78bfa27023d Mon Sep 17 00:00:00 2001 From: Jamal Hadi Salim Date: Thu, 31 Aug 2006 17:42:59 -0700 Subject: [IPSEC]: output mode to take an xfrm state as input param Expose IPSEC modes output path to take an xfrm state as input param. This makes it consistent with the input mode processing (which already takes the xfrm state as a param). Signed-off-by: Jamal Hadi Salim Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_mode_transport.c | 4 +--- net/ipv4/xfrm4_mode_tunnel.c | 3 +-- net/ipv4/xfrm4_output.c | 2 +- net/ipv6/xfrm6_mode_ro.c | 3 +-- net/ipv6/xfrm6_mode_transport.c | 3 +-- net/ipv6/xfrm6_mode_tunnel.c | 3 +-- net/ipv6/xfrm6_output.c | 2 +- 8 files changed, 8 insertions(+), 14 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 0acabf2a0a8f..4d6dc627df9b 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -285,7 +285,7 @@ extern void xfrm_put_type(struct xfrm_type *type); struct xfrm_mode { int (*input)(struct xfrm_state *x, struct sk_buff *skb); - int (*output)(struct sk_buff *skb); + int (*output)(struct xfrm_state *x,struct sk_buff *skb); struct module *owner; unsigned int encap; diff --git a/net/ipv4/xfrm4_mode_transport.c b/net/ipv4/xfrm4_mode_transport.c index a9e6b3dd19c9..92676b7e4034 100644 --- a/net/ipv4/xfrm4_mode_transport.c +++ b/net/ipv4/xfrm4_mode_transport.c @@ -21,9 +21,8 @@ * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_transport_output(struct sk_buff *skb) +static int xfrm4_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x; struct iphdr *iph; int ihl; @@ -33,7 +32,6 @@ static int xfrm4_transport_output(struct sk_buff *skb) ihl = iph->ihl * 4; skb->h.raw += ihl; - x = skb->dst->xfrm; skb->nh.raw = memmove(skb_push(skb, x->props.header_len), iph, ihl); return 0; } diff --git a/net/ipv4/xfrm4_mode_tunnel.c b/net/ipv4/xfrm4_mode_tunnel.c index 13cafbe56ce3..e23c21d31a53 100644 --- a/net/ipv4/xfrm4_mode_tunnel.c +++ b/net/ipv4/xfrm4_mode_tunnel.c @@ -33,10 +33,9 @@ static inline void ipip_ecn_decapsulate(struct sk_buff *skb) * On exit, skb->h will be set to the start of the payload to be processed * by x->type->output and skb->nh will be set to the top IP header. */ -static int xfrm4_tunnel_output(struct sk_buff *skb) +static int xfrm4_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct iphdr *iph, *top_iph; int flags; diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c index 5fd115f0c547..04403fb01a58 100644 --- a/net/ipv4/xfrm4_output.c +++ b/net/ipv4/xfrm4_output.c @@ -66,7 +66,7 @@ static int xfrm4_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; diff --git a/net/ipv6/xfrm6_mode_ro.c b/net/ipv6/xfrm6_mode_ro.c index c11c335312f9..6031c16d46ca 100644 --- a/net/ipv6/xfrm6_mode_ro.c +++ b/net/ipv6/xfrm6_mode_ro.c @@ -43,9 +43,8 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_ro_output(struct sk_buff *skb) +static int xfrm6_ro_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; diff --git a/net/ipv6/xfrm6_mode_transport.c b/net/ipv6/xfrm6_mode_transport.c index a5dce216024d..3a4b39b12bad 100644 --- a/net/ipv6/xfrm6_mode_transport.c +++ b/net/ipv6/xfrm6_mode_transport.c @@ -25,9 +25,8 @@ * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_transport_output(struct sk_buff *skb) +static int xfrm6_transport_output(struct xfrm_state *x, struct sk_buff *skb) { - struct xfrm_state *x = skb->dst->xfrm; struct ipv6hdr *iph; u8 *prevhdr; int hdr_len; diff --git a/net/ipv6/xfrm6_mode_tunnel.c b/net/ipv6/xfrm6_mode_tunnel.c index 8af79be2edca..5e7d8a7d6414 100644 --- a/net/ipv6/xfrm6_mode_tunnel.c +++ b/net/ipv6/xfrm6_mode_tunnel.c @@ -37,10 +37,9 @@ static inline void ipip6_ecn_decapsulate(struct sk_buff *skb) * its absence, that of the top IP header. The value of skb->data will always * point to the top IP header. */ -static int xfrm6_tunnel_output(struct sk_buff *skb) +static int xfrm6_tunnel_output(struct xfrm_state *x, struct sk_buff *skb) { struct dst_entry *dst = skb->dst; - struct xfrm_state *x = dst->xfrm; struct ipv6hdr *iph, *top_iph; int dsfield; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index db58104e710b..c260ea104c52 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -65,7 +65,7 @@ static int xfrm6_output_one(struct sk_buff *skb) if (err) goto error; - err = x->mode->output(skb); + err = x->mode->output(x, skb); if (err) goto error; -- cgit v1.3-8-gc7d7 From eb328111efde7bca782f340fe805756039ec6a0c Mon Sep 17 00:00:00 2001 From: Thomas Graf Date: Mon, 18 Sep 2006 00:01:59 -0700 Subject: [GENL]: Provide more information to userspace about registered genl families Additionaly exports the following information when providing the list of registered generic netlink families: - protocol version - header size - maximum number of attributes - list of available operations including - id - flags - avaiability of policy and doit/dumpit function libnl HEAD provides a utility to read this new information: 0x0010 nlctrl version 1 hdrsize 0 maxattr 6 op GETFAMILY (0x03) [POLICY,DOIT,DUMPIT] 0x0011 NLBL_MGMT version 1 hdrsize 0 maxattr 0 op unknown (0x02) [DOIT] op unknown (0x03) [DOIT] .... Signed-off-by: Thomas Graf Signed-off-by: David S. Miller --- include/linux/genetlink.h | 18 ++++++++++++++++++ include/net/genetlink.h | 2 -- net/netlink/genetlink.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 58 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 84f12a41dc01..9049dc65ae51 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -16,6 +16,8 @@ struct genlmsghdr { #define GENL_HDRLEN NLMSG_ALIGN(sizeof(struct genlmsghdr)) +#define GENL_ADMIN_PERM 0x01 + /* * List of reserved static generic netlink identifiers: */ @@ -43,9 +45,25 @@ enum { CTRL_ATTR_UNSPEC, CTRL_ATTR_FAMILY_ID, CTRL_ATTR_FAMILY_NAME, + CTRL_ATTR_VERSION, + CTRL_ATTR_HDRSIZE, + CTRL_ATTR_MAXATTR, + CTRL_ATTR_OPS, __CTRL_ATTR_MAX, }; #define CTRL_ATTR_MAX (__CTRL_ATTR_MAX - 1) +enum { + CTRL_ATTR_OP_UNSPEC, + CTRL_ATTR_OP_ID, + CTRL_ATTR_OP_FLAGS, + CTRL_ATTR_OP_POLICY, + CTRL_ATTR_OP_DOIT, + CTRL_ATTR_OP_DUMPIT, + __CTRL_ATTR_OP_MAX, +}; + +#define CTRL_ATTR_OP_MAX (__CTRL_ATTR_OP_MAX - 1) + #endif /* __LINUX_GENERIC_NETLINK_H */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 97d6d3aba9d2..4a38d85e4e25 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -27,8 +27,6 @@ struct genl_family struct list_head family_list; /* private */ }; -#define GENL_ADMIN_PERM 0x01 - /** * struct genl_info - receiving information * @snd_seq: sending sequence number diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 3ac942cdb677..49bc2db7982b 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -387,7 +387,10 @@ static void genl_rcv(struct sock *sk, int len) static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { + struct nlattr *nla_ops; + struct genl_ops *ops; void *hdr; + int idx = 1; hdr = genlmsg_put(skb, pid, seq, GENL_ID_CTRL, 0, flags, cmd, family->version); @@ -396,6 +399,37 @@ static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, NLA_PUT_STRING(skb, CTRL_ATTR_FAMILY_NAME, family->name); NLA_PUT_U16(skb, CTRL_ATTR_FAMILY_ID, family->id); + NLA_PUT_U32(skb, CTRL_ATTR_VERSION, family->version); + NLA_PUT_U32(skb, CTRL_ATTR_HDRSIZE, family->hdrsize); + NLA_PUT_U32(skb, CTRL_ATTR_MAXATTR, family->maxattr); + + nla_ops = nla_nest_start(skb, CTRL_ATTR_OPS); + if (nla_ops == NULL) + goto nla_put_failure; + + list_for_each_entry(ops, &family->ops_list, ops_list) { + struct nlattr *nest; + + nest = nla_nest_start(skb, idx++); + if (nest == NULL) + goto nla_put_failure; + + NLA_PUT_U32(skb, CTRL_ATTR_OP_ID, ops->cmd); + NLA_PUT_U32(skb, CTRL_ATTR_OP_FLAGS, ops->flags); + + if (ops->policy) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_POLICY); + + if (ops->doit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DOIT); + + if (ops->dumpit) + NLA_PUT_FLAG(skb, CTRL_ATTR_OP_DUMPIT); + + nla_nest_end(skb, nest); + } + + nla_nest_end(skb, nla_ops); return genlmsg_end(skb, hdr); @@ -411,6 +445,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) int chains_to_skip = cb->args[0]; int fams_to_skip = cb->args[1]; + if (chains_to_skip != 0) + genl_lock(); + for (i = 0; i < GENL_FAM_TAB_SIZE; i++) { if (i < chains_to_skip) continue; @@ -428,6 +465,9 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) } errout: + if (chains_to_skip != 0) + genl_unlock(); + cb->args[0] = i; cb->args[1] = n; -- cgit v1.3-8-gc7d7 From 161643660129dd7d98f0b12418c0a2710ffa7db6 Mon Sep 17 00:00:00 2001 From: Adrian Bunk Date: Mon, 18 Sep 2006 00:40:38 -0700 Subject: [SCTP]: Cleanups This patch contains the following cleanups: - make the following needlessly global function static: - socket.c: sctp_apply_peer_addr_params() - add proper prototypes for the several global functions in include/net/sctp/sctp.h Note that this fixes wrong prototypes for the following functions: - sctp_snmp_proc_exit() - sctp_eps_proc_exit() - sctp_assocs_proc_exit() The latter was spotted by the GNU C compiler and reported by David Woodhouse. Signed-off-by: Adrian Bunk Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 13 +++++++++++++ net/sctp/ipv6.c | 1 - net/sctp/protocol.c | 7 ------- net/sctp/socket.c | 14 +++++++------- 4 files changed, 20 insertions(+), 15 deletions(-) (limited to 'include/net') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index e274fd479990..ee68a3124076 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -128,6 +128,8 @@ extern int sctp_copy_local_addr_list(struct sctp_bind_addr *, int flags); extern struct sctp_pf *sctp_get_pf_specific(sa_family_t family); extern int sctp_register_pf(struct sctp_pf *, sa_family_t); +int sctp_inetaddr_event(struct notifier_block *this, unsigned long ev, + void *ptr); /* * sctp/socket.c @@ -177,6 +179,17 @@ void sctp_icmp_proto_unreachable(struct sock *sk, void sctp_backlog_migrate(struct sctp_association *assoc, struct sock *oldsk, struct sock *newsk); +/* + * sctp/proc.c + */ +int sctp_snmp_proc_init(void); +void sctp_snmp_proc_exit(void); +int sctp_eps_proc_init(void); +void sctp_eps_proc_exit(void); +int sctp_assocs_proc_init(void); +void sctp_assocs_proc_exit(void); + + /* * Section: Macros, externs, and inlines */ diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index 99c0cefc04e0..fd87e3ceb56e 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -78,7 +78,6 @@ #include -extern int sctp_inetaddr_event(struct notifier_block *, unsigned long, void *); static struct notifier_block sctp_inet6addr_notifier = { .notifier_call = sctp_inetaddr_event, }; diff --git a/net/sctp/protocol.c b/net/sctp/protocol.c index d9dd4c47bc29..fac7674438a4 100644 --- a/net/sctp/protocol.c +++ b/net/sctp/protocol.c @@ -82,13 +82,6 @@ static struct sctp_af *sctp_af_v6_specific; kmem_cache_t *sctp_chunk_cachep __read_mostly; kmem_cache_t *sctp_bucket_cachep __read_mostly; -extern int sctp_snmp_proc_init(void); -extern int sctp_snmp_proc_exit(void); -extern int sctp_eps_proc_init(void); -extern int sctp_eps_proc_exit(void); -extern int sctp_assocs_proc_init(void); -extern int sctp_assocs_proc_exit(void); - /* Return the address of the control sock. */ struct sock *sctp_get_ctl_sock(void) { diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 7c1dbb1d10df..79c3e072cf28 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -2081,13 +2081,13 @@ static int sctp_setsockopt_autoclose(struct sock *sk, char __user *optval, * SPP_SACKDELAY_ENABLE, setting both will have undefined * results. */ -int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, - struct sctp_transport *trans, - struct sctp_association *asoc, - struct sctp_sock *sp, - int hb_change, - int pmtud_change, - int sackdelay_change) +static int sctp_apply_peer_addr_params(struct sctp_paddrparams *params, + struct sctp_transport *trans, + struct sctp_association *asoc, + struct sctp_sock *sp, + int hb_change, + int pmtud_change, + int sackdelay_change) { int error; -- cgit v1.3-8-gc7d7 From 1ef9696c909060ccdae3ade245ca88692b49285b Mon Sep 17 00:00:00 2001 From: Alexey Kuznetsov Date: Tue, 19 Sep 2006 12:52:50 -0700 Subject: [TCP]: Send ACKs each 2nd received segment. It does not affect either mss-sized connections (obviously) or connections controlled by Nagle (because there is only one small segment in flight). The idea is to record the fact that a small segment arrives on a connection, where one small segment has already been received and still not-ACKed. In this case ACK is forced after tcp_recvmsg() drains receive buffer. In other words, it is a "soft" each-2nd-segment ACK, which is enough to preserve ACK clock even when ABC is enabled. Signed-off-by: Alexey Kuznetsov Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 3 ++- net/ipv4/tcp.c | 7 +++++-- net/ipv4/tcp_input.c | 2 ++ 3 files changed, 9 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 9bf73fe50948..de4e83b6da4b 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -147,7 +147,8 @@ extern struct sock *inet_csk_clone(struct sock *sk, enum inet_csk_ack_state_t { ICSK_ACK_SCHED = 1, ICSK_ACK_TIMER = 2, - ICSK_ACK_PUSHED = 4 + ICSK_ACK_PUSHED = 4, + ICSK_ACK_PUSHED2 = 8 }; extern void inet_csk_init_xmit_timers(struct sock *sk, diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 29e3d606db78..66e9a729f6df 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -955,8 +955,11 @@ void tcp_cleanup_rbuf(struct sock *sk, int copied) * receive buffer and there was a small segment * in queue. */ - (copied > 0 && (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && - !icsk->icsk_ack.pingpong && !atomic_read(&sk->sk_rmem_alloc))) + (copied > 0 && + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED2) || + ((icsk->icsk_ack.pending & ICSK_ACK_PUSHED) && + !icsk->icsk_ack.pingpong)) && + !atomic_read(&sk->sk_rmem_alloc))) time_to_ack = 1; } diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 511b738f118a..b3def0df14fb 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -156,6 +156,8 @@ static void tcp_measure_rcv_mss(struct sock *sk, return; } } + if (icsk->icsk_ack.pending & ICSK_ACK_PUSHED) + icsk->icsk_ack.pending |= ICSK_ACK_PUSHED2; icsk->icsk_ack.pending |= ICSK_ACK_PUSHED; } } -- cgit v1.3-8-gc7d7 From a1e59abf824969554b90facd44a4ab16e265afa4 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Tue, 19 Sep 2006 12:57:34 -0700 Subject: [XFRM]: Fix wildcard as tunnel source Hashing SAs by source address breaks templates with wildcards as tunnel source since the source address used for hashing/lookup is still 0/0. Move source address lookup to xfrm_tmpl_resolve_one() so we can use the real address in the lookup. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 13 +++++++++++++ net/ipv4/xfrm4_policy.c | 20 ++++++++++++++++++++ net/ipv4/xfrm4_state.c | 15 --------------- net/ipv6/xfrm6_policy.c | 21 +++++++++++++++++++++ net/ipv6/xfrm6_state.c | 16 ---------------- net/xfrm/xfrm_policy.c | 21 +++++++++++++++++++++ 6 files changed, 75 insertions(+), 31 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 4d6dc627df9b..11e0b1d6bd47 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -222,6 +222,7 @@ struct xfrm_policy_afinfo { struct dst_ops *dst_ops; void (*garbage_collect)(void); int (*dst_lookup)(struct xfrm_dst **dst, struct flowi *fl); + int (*get_saddr)(xfrm_address_t *saddr, xfrm_address_t *daddr); struct dst_entry *(*find_bundle)(struct flowi *fl, struct xfrm_policy *policy); int (*bundle_create)(struct xfrm_policy *policy, struct xfrm_state **xfrm, @@ -630,6 +631,18 @@ secpath_reset(struct sk_buff *skb) #endif } +static inline int +xfrm_addr_any(xfrm_address_t *addr, unsigned short family) +{ + switch (family) { + case AF_INET: + return addr->a4 == 0; + case AF_INET6: + return ipv6_addr_any((struct in6_addr *)&addr->a6); + } + return 0; +} + static inline int __xfrm4_state_addr_cmp(struct xfrm_tmpl *tmpl, struct xfrm_state *x) { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 479598566f1d..eabcd27b1767 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -21,6 +21,25 @@ static int xfrm4_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return __ip_route_output_key((struct rtable**)dst, fl); } +static int xfrm4_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rtable *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip4_u = { + .daddr = daddr->a4, + }, + }, + }; + + if (!xfrm4_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + saddr->a4 = rt->rt_src; + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm4_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -298,6 +317,7 @@ static struct xfrm_policy_afinfo xfrm4_policy_afinfo = { .family = AF_INET, .dst_ops = &xfrm4_dst_ops, .dst_lookup = xfrm4_dst_lookup, + .get_saddr = xfrm4_get_saddr, .find_bundle = __xfrm4_find_bundle, .bundle_create = __xfrm4_bundle_create, .decode_session = _decode_session4, diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c index 6a2a4ab42772..fe2034494d08 100644 --- a/net/ipv4/xfrm4_state.c +++ b/net/ipv4/xfrm4_state.c @@ -42,21 +42,6 @@ __xfrm4_init_tempsel(struct xfrm_state *x, struct flowi *fl, x->props.saddr = tmpl->saddr; if (x->props.saddr.a4 == 0) x->props.saddr.a4 = saddr->a4; - if (tmpl->mode == XFRM_MODE_TUNNEL && x->props.saddr.a4 == 0) { - struct rtable *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip4_u = { - .daddr = x->id.daddr.a4, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET)) { - x->props.saddr.a4 = rt->rt_src; - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET; diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 9391c4c94feb..6a252e2134d1 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -34,6 +34,26 @@ static int xfrm6_dst_lookup(struct xfrm_dst **dst, struct flowi *fl) return err; } +static int xfrm6_get_saddr(xfrm_address_t *saddr, xfrm_address_t *daddr) +{ + struct rt6_info *rt; + struct flowi fl_tunnel = { + .nl_u = { + .ip6_u = { + .daddr = *(struct in6_addr *)&daddr->a6, + }, + }, + }; + + if (!xfrm6_dst_lookup((struct xfrm_dst **)&rt, &fl_tunnel)) { + ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)&daddr->a6, + (struct in6_addr *)&saddr->a6); + dst_release(&rt->u.dst); + return 0; + } + return -EHOSTUNREACH; +} + static struct dst_entry * __xfrm6_find_bundle(struct flowi *fl, struct xfrm_policy *policy) { @@ -362,6 +382,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .family = AF_INET6, .dst_ops = &xfrm6_dst_ops, .dst_lookup = xfrm6_dst_lookup, + .get_saddr = xfrm6_get_saddr, .find_bundle = __xfrm6_find_bundle, .bundle_create = __xfrm6_bundle_create, .decode_session = _decode_session6, diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c index d88cd92c864e..711bfafb2472 100644 --- a/net/ipv6/xfrm6_state.c +++ b/net/ipv6/xfrm6_state.c @@ -42,22 +42,6 @@ __xfrm6_init_tempsel(struct xfrm_state *x, struct flowi *fl, memcpy(&x->props.saddr, &tmpl->saddr, sizeof(x->props.saddr)); if (ipv6_addr_any((struct in6_addr*)&x->props.saddr)) memcpy(&x->props.saddr, saddr, sizeof(x->props.saddr)); - if (tmpl->mode == XFRM_MODE_TUNNEL && ipv6_addr_any((struct in6_addr*)&x->props.saddr)) { - struct rt6_info *rt; - struct flowi fl_tunnel = { - .nl_u = { - .ip6_u = { - .daddr = *(struct in6_addr *)daddr, - } - } - }; - if (!xfrm_dst_lookup((struct xfrm_dst **)&rt, - &fl_tunnel, AF_INET6)) { - ipv6_get_saddr(&rt->u.dst, (struct in6_addr *)daddr, - (struct in6_addr *)&x->props.saddr); - dst_release(&rt->u.dst); - } - } x->props.mode = tmpl->mode; x->props.reqid = tmpl->reqid; x->props.family = AF_INET6; diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 537854fe47ca..b6e2e79d7261 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1107,6 +1107,20 @@ int __xfrm_sk_clone_policy(struct sock *sk) return 0; } +static int +xfrm_get_saddr(xfrm_address_t *local, xfrm_address_t *remote, + unsigned short family) +{ + int err; + struct xfrm_policy_afinfo *afinfo = xfrm_policy_get_afinfo(family); + + if (unlikely(afinfo == NULL)) + return -EINVAL; + err = afinfo->get_saddr(local, remote); + xfrm_policy_put_afinfo(afinfo); + return err; +} + /* Resolve list of templates for the flow, given policy. */ static int @@ -1118,6 +1132,7 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, int i, error; xfrm_address_t *daddr = xfrm_flowi_daddr(fl, family); xfrm_address_t *saddr = xfrm_flowi_saddr(fl, family); + xfrm_address_t tmp; for (nx=0, i = 0; i < policy->xfrm_nr; i++) { struct xfrm_state *x; @@ -1128,6 +1143,12 @@ xfrm_tmpl_resolve_one(struct xfrm_policy *policy, struct flowi *fl, if (tmpl->mode == XFRM_MODE_TUNNEL) { remote = &tmpl->id.daddr; local = &tmpl->saddr; + if (xfrm_addr_any(local, family)) { + error = xfrm_get_saddr(&tmp, remote, family); + if (error) + goto fail; + local = &tmp; + } } x = xfrm_state_find(remote, local, fl, tmpl, policy, &error, family); -- cgit v1.3-8-gc7d7 From 9123de2c043996050bacf77031cad845f5976f5d Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Wed, 20 Sep 2006 11:59:42 -0700 Subject: [NETFILTER]: ip6table_mangle: reroute when nfmark changes in NF_IP6_LOCAL_OUT Now that IPv6 supports policy routing we need to reroute in NF_IP6_LOCAL_OUT when the mark value changes. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/linux/netfilter_ipv6.h | 1 + include/net/ip6_route.h | 2 -- net/ipv6/netfilter/ip6table_mangle.c | 8 ++------ 3 files changed, 3 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 52a7b9e76428..d97e268cdfe5 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -73,6 +73,7 @@ enum nf_ip6_hook_priorities { }; #ifdef CONFIG_NETFILTER +extern int ip6_route_me_harder(struct sk_buff *skb); extern unsigned int nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 297909570041..6ca6b71dfe0f 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -57,8 +57,6 @@ extern void ip6_route_input(struct sk_buff *skb); extern struct dst_entry * ip6_route_output(struct sock *sk, struct flowi *fl); -extern int ip6_route_me_harder(struct sk_buff *skb); - extern void ip6_route_init(void); extern void ip6_route_cleanup(void); diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 32db04fd8310..386ea260e767 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -180,12 +180,8 @@ ip6t_local_hook(unsigned int hook, && (memcmp(&(*pskb)->nh.ipv6h->saddr, &saddr, sizeof(saddr)) || memcmp(&(*pskb)->nh.ipv6h->daddr, &daddr, sizeof(daddr)) || (*pskb)->nfmark != nfmark - || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) { - - /* something which could affect routing has changed */ - - DEBUGP("ip6table_mangle: we'd need to re-route a packet\n"); - } + || (*pskb)->nh.ipv6h->hop_limit != hop_limit)) + return ip6_route_me_harder(*pskb) == 0 ? ret : NF_DROP; return ret; } -- cgit v1.3-8-gc7d7 From 62dd93181aaa1d5a501a9cebcb254f44b8a48af7 Mon Sep 17 00:00:00 2001 From: Ville Nuorvala Date: Fri, 22 Sep 2006 14:43:19 -0700 Subject: [IPV6] NDISC: Set per-entry is_router flag in Proxy NA. We have sent NA with router flag from the node-wide forwarding configuration. This is not appropriate for proxy NA, and it should be set according to each proxy entry's configuration. This is used by Mobile IPv6 home agent to support physical home link in acting as a proxy router for mobile node which is not a router, for example. Based on MIPL2 kernel patch. Signed-off-by: Ville Nuorvala Signed-off-by: Masahide NAKAMURA Signed-off-by: YOSHIFUJI Hideaki --- include/net/neighbour.h | 1 + net/core/neighbour.c | 11 ++++++++--- net/ipv6/ndisc.c | 14 +++++++++++--- 3 files changed, 20 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/neighbour.h b/include/net/neighbour.h index bd187daffdb9..c8aacbd2e333 100644 --- a/include/net/neighbour.h +++ b/include/net/neighbour.h @@ -126,6 +126,7 @@ struct pneigh_entry { struct pneigh_entry *next; struct net_device *dev; + u8 flags; u8 key[0]; }; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index a45bd2124d6b..b6c69e1463e8 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1544,9 +1544,14 @@ int neigh_add(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) lladdr = tb[NDA_LLADDR] ? nla_data(tb[NDA_LLADDR]) : NULL; if (ndm->ndm_flags & NTF_PROXY) { - err = 0; - if (pneigh_lookup(tbl, dst, dev, 1) == NULL) - err = -ENOBUFS; + struct pneigh_entry *pn; + + err = -ENOBUFS; + pn = pneigh_lookup(tbl, dst, dev, 1); + if (pn) { + pn->flags = ndm->ndm_flags; + err = 0; + } goto out_dev_put; } diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 0e0d6ce69021..ddf038636f01 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -736,8 +736,10 @@ static void ndisc_recv_ns(struct sk_buff *skb) struct inet6_ifaddr *ifp; struct inet6_dev *idev = NULL; struct neighbour *neigh; + struct pneigh_entry *pneigh = NULL; int dad = ipv6_addr_any(saddr); int inc; + int is_router; if (ipv6_addr_is_multicast(&msg->target)) { ND_PRINTK2(KERN_WARNING @@ -822,7 +824,8 @@ static void ndisc_recv_ns(struct sk_buff *skb) if (ipv6_chk_acast_addr(dev, &msg->target) || (idev->cnf.forwarding && - pneigh_lookup(&nd_tbl, &msg->target, dev, 0))) { + (pneigh = pneigh_lookup(&nd_tbl, + &msg->target, dev, 0)) != NULL)) { if (!(NEIGH_CB(skb)->flags & LOCALLY_ENQUEUED) && skb->pkt_type != PACKET_HOST && inc != 0 && @@ -843,12 +846,17 @@ static void ndisc_recv_ns(struct sk_buff *skb) goto out; } + if (pneigh) + is_router = pneigh->flags & NTF_ROUTER; + else + is_router = idev->cnf.forwarding; + if (dad) { struct in6_addr maddr; ipv6_addr_all_nodes(&maddr); ndisc_send_na(dev, NULL, &maddr, &msg->target, - idev->cnf.forwarding, 0, (ifp != NULL), 1); + is_router, 0, (ifp != NULL), 1); goto out; } @@ -869,7 +877,7 @@ static void ndisc_recv_ns(struct sk_buff *skb) NEIGH_UPDATE_F_OVERRIDE); if (neigh || !dev->hard_header) { ndisc_send_na(dev, neigh, saddr, &msg->target, - idev->cnf.forwarding, + is_router, 1, (ifp != NULL && inc), inc); if (neigh) neigh_release(neigh); -- cgit v1.3-8-gc7d7 From 8814c4b533817df825485ff32ce6ac406c3a54d1 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki Date: Fri, 22 Sep 2006 14:44:24 -0700 Subject: [IPV6] ADDRCONF: Convert addrconf_lock to RCU. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/net/addrconf.h | 10 ++++------ include/net/if_inet6.h | 1 + net/core/pktgen.c | 4 ++-- net/ipv6/addrconf.c | 46 ++++++++++++++++++++++++---------------------- net/ipv6/anycast.c | 4 ++-- net/ipv6/ipv6_syms.c | 1 - net/sctp/ipv6.c | 6 +++--- 7 files changed, 36 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 5fc8627435eb..aa2ed8f0a9dd 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -133,20 +133,18 @@ extern int unregister_inet6addr_notifier(struct notifier_block *nb); static inline struct inet6_dev * __in6_dev_get(struct net_device *dev) { - return (struct inet6_dev *)dev->ip6_ptr; + return rcu_dereference(dev->ip6_ptr); } -extern rwlock_t addrconf_lock; - static inline struct inet6_dev * in6_dev_get(struct net_device *dev) { struct inet6_dev *idev = NULL; - read_lock(&addrconf_lock); - idev = dev->ip6_ptr; + rcu_read_lock(); + idev = __in6_dev_get(dev); if (idev) atomic_inc(&idev->refcnt); - read_unlock(&addrconf_lock); + rcu_read_unlock(); return idev; } diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h index e459e1a0ae4a..34489c13c119 100644 --- a/include/net/if_inet6.h +++ b/include/net/if_inet6.h @@ -189,6 +189,7 @@ struct inet6_dev struct ipv6_devconf cnf; struct ipv6_devstat stats; unsigned long tstamp; /* ipv6InterfaceTable update timestamp */ + struct rcu_head rcu; }; extern struct ipv6_devconf ipv6_devconf; diff --git a/net/core/pktgen.c b/net/core/pktgen.c index 6a7320b39ed0..72145d4a2600 100644 --- a/net/core/pktgen.c +++ b/net/core/pktgen.c @@ -1786,7 +1786,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) * use ipv6_get_lladdr if/when it's get exported */ - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(pkt_dev->odev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1805,7 +1805,7 @@ static void pktgen_setup_inject(struct pktgen_dev *pkt_dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); if (err) printk("pktgen: ERROR: IPv6 link address not availble.\n"); } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 825a291d5aa5..c09ebb7bb98a 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -119,9 +119,6 @@ static int ipv6_count_addresses(struct inet6_dev *idev); static struct inet6_ifaddr *inet6_addr_lst[IN6_ADDR_HSIZE]; static DEFINE_RWLOCK(addrconf_hash_lock); -/* Protects inet6 devices */ -DEFINE_RWLOCK(addrconf_lock); - static void addrconf_verify(unsigned long); static DEFINE_TIMER(addr_chk_timer, addrconf_verify, 0, 0); @@ -318,6 +315,12 @@ static void addrconf_mod_timer(struct inet6_ifaddr *ifp, /* Nobody refers to this device, we may destroy it. */ +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + kfree(idev); +} + void in6_dev_finish_destroy(struct inet6_dev *idev) { struct net_device *dev = idev->dev; @@ -332,7 +335,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) return; } snmp6_free_dev(idev); - kfree(idev); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } static struct inet6_dev * ipv6_add_dev(struct net_device *dev) @@ -408,9 +411,8 @@ static struct inet6_dev * ipv6_add_dev(struct net_device *dev) if (netif_carrier_ok(dev)) ndev->if_flags |= IF_READY; - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = ndev; - write_unlock_bh(&addrconf_lock); + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, ndev); ipv6_mc_init_dev(ndev); ndev->tstamp = jiffies; @@ -474,7 +476,7 @@ static void addrconf_forward_change(void) read_lock(&dev_base_lock); for (dev=dev_base; dev; dev=dev->next) { - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!ipv6_devconf.forwarding); @@ -482,7 +484,7 @@ static void addrconf_forward_change(void) if (changed) dev_forward_change(idev); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); } read_unlock(&dev_base_lock); } @@ -543,7 +545,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, int hash; int err = 0; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (idev->dead) { err = -ENODEV; /*XXX*/ goto out2; @@ -612,7 +614,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, int pfxlen, in6_ifa_hold(ifa); write_unlock(&idev->lock); out2: - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); if (likely(err == 0)) atomic_notifier_call_chain(&inet6addr_chain, NETDEV_UP, ifa); @@ -915,7 +917,7 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, memset(&hiscore, 0, sizeof(hiscore)); read_lock(&dev_base_lock); - read_lock(&addrconf_lock); + rcu_read_lock(); for (dev = dev_base; dev; dev=dev->next) { struct inet6_dev *idev; @@ -1127,7 +1129,7 @@ record_it: } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); read_unlock(&dev_base_lock); if (!ifa_result) @@ -1151,7 +1153,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) struct inet6_dev *idev; int err = -EADDRNOTAVAIL; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((idev = __in6_dev_get(dev)) != NULL) { struct inet6_ifaddr *ifp; @@ -1165,7 +1167,7 @@ int ipv6_get_lladdr(struct net_device *dev, struct in6_addr *addr) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return err; } @@ -1466,7 +1468,7 @@ static void ipv6_regen_rndid(unsigned long data) struct inet6_dev *idev = (struct inet6_dev *) data; unsigned long expires; - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); write_lock_bh(&idev->lock); if (idev->dead) @@ -1490,7 +1492,7 @@ static void ipv6_regen_rndid(unsigned long data) out: write_unlock_bh(&idev->lock); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); in6_dev_put(idev); } @@ -2342,10 +2344,10 @@ static int addrconf_ifdown(struct net_device *dev, int how) Do not dev_put! */ if (how == 1) { - write_lock_bh(&addrconf_lock); - dev->ip6_ptr = NULL; idev->dead = 1; - write_unlock_bh(&addrconf_lock); + + /* protected by rtnl_lock */ + rcu_assign_pointer(dev->ip6_ptr, NULL); /* Step 1.5: remove snmp6 entry */ snmp6_unregister_dev(idev); @@ -3573,10 +3575,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) { - read_lock_bh(&addrconf_lock); + rcu_read_lock_bh(); if (likely(ifp->idev->dead == 0)) __ipv6_ifa_notify(event, ifp); - read_unlock_bh(&addrconf_lock); + rcu_read_unlock_bh(); } #ifdef CONFIG_SYSCTL diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index b80fc502ca03..a9604764e015 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -56,7 +56,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) int onlink; onlink = 0; - read_lock(&addrconf_lock); + rcu_read_lock(); idev = __in6_dev_get(dev); if (idev) { read_lock_bh(&idev->lock); @@ -68,7 +68,7 @@ ip6_onlink(struct in6_addr *addr, struct net_device *dev) } read_unlock_bh(&idev->lock); } - read_unlock(&addrconf_lock); + rcu_read_unlock(); return onlink; } diff --git a/net/ipv6/ipv6_syms.c b/net/ipv6/ipv6_syms.c index 7b7b90d9c3d0..0e8e0676a033 100644 --- a/net/ipv6/ipv6_syms.c +++ b/net/ipv6/ipv6_syms.c @@ -14,7 +14,6 @@ EXPORT_SYMBOL(ndisc_mc_map); EXPORT_SYMBOL(register_inet6addr_notifier); EXPORT_SYMBOL(unregister_inet6addr_notifier); EXPORT_SYMBOL(ip6_route_output); -EXPORT_SYMBOL(addrconf_lock); EXPORT_SYMBOL(ipv6_setsockopt); EXPORT_SYMBOL(ipv6_getsockopt); EXPORT_SYMBOL(inet6_register_protosw); diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c index fd87e3ceb56e..249e5033c1a8 100644 --- a/net/sctp/ipv6.c +++ b/net/sctp/ipv6.c @@ -321,9 +321,9 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, struct inet6_ifaddr *ifp; struct sctp_sockaddr_entry *addr; - read_lock(&addrconf_lock); + rcu_read_lock(); if ((in6_dev = __in6_dev_get(dev)) == NULL) { - read_unlock(&addrconf_lock); + rcu_read_unlock(); return; } @@ -342,7 +342,7 @@ static void sctp_v6_copy_addrlist(struct list_head *addrlist, } read_unlock(&in6_dev->lock); - read_unlock(&addrconf_lock); + rcu_read_unlock(); } /* Initialize a sockaddr_storage from in incoming skb. */ -- cgit v1.3-8-gc7d7 From 3b9f9a1c3903b64c38505f9fed3bb11e48dbc931 Mon Sep 17 00:00:00 2001 From: Noriaki TAKAMIYA Date: Fri, 22 Sep 2006 14:45:56 -0700 Subject: [IPV6] ADDRCONF: Mobile IPv6 Home Address support. IFA_F_HOMEADDRESS is introduced for Mobile IPv6 Home Addresses on Mobile Node. The IFA_F_HOMEADDRESS flag should be set for Mobile IPv6 Home Addresses for 2 purposes. 1) We need to check this on receipt of Type 2 Routing Header (RFC3775 Secion 6.4), 2) We prefer Home Address(es) in source address selection (RFC3484 Section 5 Rule 4). Signed-off-by: Noriaki TAKAMIYA Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- include/linux/if_addr.h | 1 + include/net/addrconf.h | 6 +----- net/ipv6/addrconf.c | 44 +++++++++++++++++++++++++++++++++++++++++--- 3 files changed, 43 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/linux/if_addr.h b/include/linux/if_addr.h index ca24b9de13fb..dbe8f6120a40 100644 --- a/include/linux/if_addr.h +++ b/include/linux/if_addr.h @@ -39,6 +39,7 @@ enum #define IFA_F_TEMPORARY IFA_F_SECONDARY #define IFA_F_NODAD 0x02 +#define IFA_F_HOMEADDRESS 0x10 #define IFA_F_DEPRECATED 0x20 #define IFA_F_TENTATIVE 0x40 #define IFA_F_PERMANENT 0x80 diff --git a/include/net/addrconf.h b/include/net/addrconf.h index aa2ed8f0a9dd..44f1b673f916 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -61,12 +61,8 @@ extern int addrconf_set_dstaddr(void __user *arg); extern int ipv6_chk_addr(struct in6_addr *addr, struct net_device *dev, int strict); -/* XXX: this is a placeholder till addrconf supports */ #ifdef CONFIG_IPV6_MIP6 -static inline int ipv6_chk_home_addr(struct in6_addr *addr) -{ - return 0; -} +extern int ipv6_chk_home_addr(struct in6_addr *addr); #endif extern struct inet6_ifaddr * ipv6_get_ifaddr(struct in6_addr *addr, struct net_device *dev, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index adb583a26151..c18676352397 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1038,9 +1038,27 @@ int ipv6_dev_get_saddr(struct net_device *daddr_dev, continue; } - /* Rule 4: Prefer home address -- not implemented yet */ + /* Rule 4: Prefer home address */ +#ifdef CONFIG_IPV6_MIP6 + if (hiscore.rule < 4) { + if (ifa_result->flags & IFA_F_HOMEADDRESS) + hiscore.attrs |= IPV6_SADDR_SCORE_HOA; + hiscore.rule++; + } + if (ifa->flags & IFA_F_HOMEADDRESS) { + score.attrs |= IPV6_SADDR_SCORE_HOA; + if (!(ifa_result->flags & IFA_F_HOMEADDRESS)) { + score.rule = 4; + goto record_it; + } + } else { + if (hiscore.attrs & IPV6_SADDR_SCORE_HOA) + continue; + } +#else if (hiscore.rule < 4) hiscore.rule++; +#endif /* Rule 5: Prefer outgoing interface */ if (hiscore.rule < 5) { @@ -2759,6 +2777,26 @@ void if6_proc_exit(void) } #endif /* CONFIG_PROC_FS */ +#ifdef CONFIG_IPV6_MIP6 +/* Check if address is a home address configured on any interface. */ +int ipv6_chk_home_addr(struct in6_addr *addr) +{ + int ret = 0; + struct inet6_ifaddr * ifp; + u8 hash = ipv6_addr_hash(addr); + read_lock_bh(&addrconf_hash_lock); + for (ifp = inet6_addr_lst[hash]; ifp; ifp = ifp->lst_next) { + if (ipv6_addr_cmp(&ifp->addr, addr) == 0 && + (ifp->flags & IFA_F_HOMEADDRESS)) { + ret = 1; + break; + } + } + read_unlock_bh(&addrconf_hash_lock); + return ret; +} +#endif + /* * Periodic address status verification */ @@ -2930,7 +2968,7 @@ static int inet6_addr_modify(struct inet6_ifaddr *ifp, u8 ifa_flags, prefered_lft = 0x7FFFFFFF/HZ; spin_lock_bh(&ifp->lock); - ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD)) | ifa_flags; + ifp->flags = (ifp->flags & ~(IFA_F_DEPRECATED | IFA_F_PERMANENT | IFA_F_NODAD | IFA_F_HOMEADDRESS)) | ifa_flags; ifp->tstamp = jiffies; ifp->valid_lft = valid_lft; ifp->prefered_lft = prefered_lft; @@ -2981,7 +3019,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) return -ENODEV; /* We ignore other flags so far. */ - ifa_flags = ifm->ifa_flags & IFA_F_NODAD; + ifa_flags = ifm->ifa_flags & (IFA_F_NODAD | IFA_F_HOMEADDRESS); ifa = ipv6_get_ifaddr(pfx, dev, 1); if (ifa == NULL) { -- cgit v1.3-8-gc7d7 From 1db2ea398ffbfd9ea46d509ff0e4a85bb4b8c0ea Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2006 23:41:42 +0100 Subject: [PATCH] netlabel gfp annotations Signed-off-by: Al Viro Signed-off-by: Linus Torvalds --- include/net/netlabel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/netlabel.h b/include/net/netlabel.h index fc2b72fc7e07..dd5780b36919 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -108,7 +108,7 @@ */ static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, size_t body, - int gfp_flags) + gfp_t gfp_flags) { struct sk_buff *skb; -- cgit v1.3-8-gc7d7 From 14a72f53fb1bb5d5c2bdd8cf172219519664729a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Sep 2006 15:52:01 -0700 Subject: [NetLabel]: correct improper handling of non-NetLabel peer contexts Fix a problem where NetLabel would always set the value of sk_security_struct->peer_sid in selinux_netlbl_sock_graft() to the context of the socket, causing problems when users would query the context of the connection. This patch fixes this so that the value in sk_security_struct->peer_sid is only set when the connection is NetLabel based, otherwise the value is untouched. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 7 ++++++ include/net/netlabel.h | 8 +++++++ net/ipv4/cipso_ipv4.c | 48 +++++++++++++++++++++++++++++------------- net/netlabel/netlabel_kapi.c | 23 ++++++++++++++++++++ security/selinux/ss/services.c | 12 ++++++++++- 5 files changed, 82 insertions(+), 16 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 59406e0dc5b2..6718452a5cd0 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -205,6 +205,7 @@ void cipso_v4_error(struct sk_buff *skb, int error, u32 gateway); int cipso_v4_socket_setattr(const struct socket *sock, const struct cipso_v4_doi *doi_def, const struct netlbl_lsm_secattr *secattr); +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr); int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int cipso_v4_skbuff_getattr(const struct sk_buff *skb, @@ -225,6 +226,12 @@ static inline int cipso_v4_socket_setattr(const struct socket *sock, return -ENOSYS; } +static inline int cipso_v4_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + static inline int cipso_v4_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr) { diff --git a/include/net/netlabel.h b/include/net/netlabel.h index dd5780b36919..bf7b564e3540 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -238,6 +238,8 @@ static inline void netlbl_secattr_free(struct netlbl_lsm_secattr *secattr, #ifdef CONFIG_NETLABEL int netlbl_socket_setattr(const struct socket *sock, const struct netlbl_lsm_secattr *secattr); +int netlbl_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr); int netlbl_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr); int netlbl_skbuff_getattr(const struct sk_buff *skb, @@ -250,6 +252,12 @@ static inline int netlbl_socket_setattr(const struct socket *sock, return -ENOSYS; } +static inline int netlbl_sock_getattr(struct sock *sk, + struct netlbl_lsm_secattr *secattr) +{ + return -ENOSYS; +} + static inline int netlbl_socket_getattr(const struct socket *sock, struct netlbl_lsm_secattr *secattr) { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 80a2a0911b49..a3bae2ca8acc 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -1486,43 +1486,40 @@ socket_setattr_failure: } /** - * cipso_v4_socket_getattr - Get the security attributes from a socket - * @sock: the socket + * cipso_v4_sock_getattr - Get the security attributes from a sock + * @sk: the sock * @secattr: the security attributes * * Description: - * Query @sock to see if there is a CIPSO option attached to the socket and if - * there is return the CIPSO security attributes in @secattr. Returns zero on - * success and negative values on failure. + * Query @sk to see if there is a CIPSO option attached to the sock and if + * there is return the CIPSO security attributes in @secattr. This function + * requires that @sk be locked, or privately held, but it does not do any + * locking itself. Returns zero on success and negative values on failure. * */ -int cipso_v4_socket_getattr(const struct socket *sock, - struct netlbl_lsm_secattr *secattr) +int cipso_v4_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) { int ret_val = -ENOMSG; - struct sock *sk; struct inet_sock *sk_inet; unsigned char *cipso_ptr; u32 doi; struct cipso_v4_doi *doi_def; - sk = sock->sk; - lock_sock(sk); sk_inet = inet_sk(sk); if (sk_inet->opt == NULL || sk_inet->opt->cipso == 0) - goto socket_getattr_return; + return -ENOMSG; cipso_ptr = sk_inet->opt->__data + sk_inet->opt->cipso - sizeof(struct iphdr); ret_val = cipso_v4_cache_check(cipso_ptr, cipso_ptr[1], secattr); if (ret_val == 0) - goto socket_getattr_return; + return ret_val; doi = ntohl(*(u32 *)&cipso_ptr[2]); rcu_read_lock(); doi_def = cipso_v4_doi_getdef(doi); if (doi_def == NULL) { rcu_read_unlock(); - goto socket_getattr_return; + return -ENOMSG; } switch (cipso_ptr[6]) { case CIPSO_V4_TAG_RBITMAP: @@ -1533,8 +1530,29 @@ int cipso_v4_socket_getattr(const struct socket *sock, } rcu_read_unlock(); -socket_getattr_return: - release_sock(sk); + return ret_val; +} + +/** + * cipso_v4_socket_getattr - Get the security attributes from a socket + * @sock: the socket + * @secattr: the security attributes + * + * Description: + * Query @sock to see if there is a CIPSO option attached to the socket and if + * there is return the CIPSO security attributes in @secattr. Returns zero on + * success and negative values on failure. + * + */ +int cipso_v4_socket_getattr(const struct socket *sock, + struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + lock_sock(sock->sk); + ret_val = cipso_v4_sock_getattr(sock->sk, secattr); + release_sock(sock->sk); + return ret_val; } diff --git a/net/netlabel/netlabel_kapi.c b/net/netlabel/netlabel_kapi.c index 0fd8aaafe23f..54fb7de3c2b1 100644 --- a/net/netlabel/netlabel_kapi.c +++ b/net/netlabel/netlabel_kapi.c @@ -84,6 +84,29 @@ socket_setattr_return: return ret_val; } +/** + * netlbl_sock_getattr - Determine the security attributes of a sock + * @sk: the sock + * @secattr: the security attributes + * + * Description: + * Examines the given sock to see any NetLabel style labeling has been + * applied to the sock, if so it parses the socket label and returns the + * security attributes in @secattr. Returns zero on success, negative values + * on failure. + * + */ +int netlbl_sock_getattr(struct sock *sk, struct netlbl_lsm_secattr *secattr) +{ + int ret_val; + + ret_val = cipso_v4_sock_getattr(sk, secattr); + if (ret_val == 0) + return 0; + + return netlbl_unlabel_getattr(secattr); +} + /** * netlbl_socket_getattr - Determine the security attributes of a socket * @sock: the socket diff --git a/security/selinux/ss/services.c b/security/selinux/ss/services.c index 7eb69a602d8f..d67f7e658529 100644 --- a/security/selinux/ss/services.c +++ b/security/selinux/ss/services.c @@ -2502,14 +2502,24 @@ void selinux_netlbl_sock_graft(struct sock *sk, struct socket *sock) { struct inode_security_struct *isec = SOCK_INODE(sock)->i_security; struct sk_security_struct *sksec = sk->sk_security; + struct netlbl_lsm_secattr secattr; + u32 nlbl_peer_sid; sksec->sclass = isec->sclass; if (sk->sk_family != PF_INET) return; + netlbl_secattr_init(&secattr); + if (netlbl_sock_getattr(sk, &secattr) == 0 && + selinux_netlbl_secattr_to_sid(NULL, + &secattr, + sksec->sid, + &nlbl_peer_sid) == 0) + sksec->peer_sid = nlbl_peer_sid; + netlbl_secattr_destroy(&secattr, 0); + sksec->nlbl_state = NLBL_REQUIRE; - sksec->peer_sid = sksec->sid; /* Try to set the NetLabel on the socket to save time later, if we fail * here we will pick up the pieces in later calls to -- cgit v1.3-8-gc7d7 From 22acb19a91d2b551ea37647747972e5286284b22 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Sep 2006 15:53:37 -0700 Subject: [NETLINK]: add nla_for_each_nested() to the interface list At the top of include/net/netlink.h is a list of Netlink interfaces, however, the nla_for_each_nested() macro was not listed. This patch adds this interface to the list at the top of the header file. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/netlink.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 11dc2e7f679a..2897a73f81eb 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -151,6 +151,7 @@ * nla_parse() parse and validate stream of attrs * nla_parse_nested() parse nested attribuets * nla_for_each_attr() loop over all attributes + * nla_for_each_nested() loop over the nested attributes *========================================================================= */ -- cgit v1.3-8-gc7d7 From 4fe5d5c07ab615a52fd1b0ceba5aeed7c612821a Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Sep 2006 15:54:03 -0700 Subject: [Netlink]: add nla_validate_nested() Add a new function, nla_validate_nested(), to validate nested Netlink attributes. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/netlink.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 2897a73f81eb..4ab68a7a636a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -146,6 +146,7 @@ * nla_ok(nla, remaining) does nla fit into remaining bytes? * nla_next(nla, remaining) get next netlink attribute * nla_validate() validate a stream of attributes + * nla_validate_nested() validate a stream of nested attributes * nla_find() find attribute in stream of attributes * nla_find_nested() find attribute in nested attributes * nla_parse() parse and validate stream of attrs @@ -950,6 +951,24 @@ static inline int nla_nest_cancel(struct sk_buff *skb, struct nlattr *start) return nlmsg_trim(skb, start); } +/** + * nla_validate_nested - Validate a stream of nested attributes + * @start: container attribute + * @maxtype: maximum attribute type to be expected + * @policy: validation policy + * + * Validates all attributes in the nested attribute stream against the + * specified policy. Attributes with a type exceeding maxtype will be + * ignored. See documenation of struct nla_policy for more details. + * + * Returns 0 on success or a negative error code. + */ +static inline int nla_validate_nested(struct nlattr *start, int maxtype, + struct nla_policy *policy) +{ + return nla_validate(nla_data(start), nla_len(start), maxtype, policy); +} + /** * nla_for_each_attr - iterate over a stream of attributes * @pos: loop counter, set to current attribute -- cgit v1.3-8-gc7d7 From fcd48280643e92ec6cb29a04e9079dd7b6b5bfef Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Mon, 25 Sep 2006 15:56:09 -0700 Subject: [NetLabel]: rework the Netlink attribute handling (part 1) At the suggestion of Thomas Graf, rewrite NetLabel's use of Netlink attributes to better follow the common Netlink attribute usage. Signed-off-by: Paul Moore Signed-off-by: David S. Miller --- include/net/cipso_ipv4.h | 16 ++- include/net/netlabel.h | 49 +-------- net/ipv4/cipso_ipv4.c | 203 +++++-------------------------------- net/netlabel/netlabel_domainhash.c | 183 +++++++-------------------------- net/netlabel/netlabel_domainhash.h | 6 +- net/netlabel/netlabel_user.c | 82 --------------- net/netlabel/netlabel_user.h | 141 +------------------------- 7 files changed, 76 insertions(+), 604 deletions(-) (limited to 'include/net') diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 6718452a5cd0..2d72496c2029 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -130,8 +130,9 @@ extern int cipso_v4_rbm_strictvalid; int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom); -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom); +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg); int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, const char *domain); int cipso_v4_doi_domhsh_remove(struct cipso_v4_doi *doi_def, const char *domain); @@ -152,14 +153,11 @@ static inline struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) return NULL; } -static inline struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +static inline int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - return NULL; -} - -static inline struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - return NULL; + return 0; } static inline int cipso_v4_doi_domhsh_add(struct cipso_v4_doi *doi_def, diff --git a/include/net/netlabel.h b/include/net/netlabel.h index bf7b564e3540..6692430063fd 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -57,9 +57,8 @@ * The payload is dependent on the subsystem specified in the * 'nlmsghdr->nlmsg_type' and should be defined below, supporting functions * should be defined in the corresponding net/netlabel/netlabel_.h|c - * file. All of the fields in the NetLabel payload are NETLINK attributes, the - * length of each field is the length of the NETLINK attribute payload, see - * include/net/netlink.h for more information on NETLINK attributes. + * file. All of the fields in the NetLabel payload are NETLINK attributes, see + * the include/net/netlink.h file for more information on NETLINK attributes. * */ @@ -82,50 +81,6 @@ #define NETLBL_NLTYPE_UNLABELED 5 #define NETLBL_NLTYPE_UNLABELED_NAME "NLBL_UNLBL" -/* NetLabel return codes */ -#define NETLBL_E_OK 0 - -/* - * Helper functions - */ - -#define NETLBL_LEN_U8 nla_total_size(sizeof(u8)) -#define NETLBL_LEN_U16 nla_total_size(sizeof(u16)) -#define NETLBL_LEN_U32 nla_total_size(sizeof(u32)) - -/** - * netlbl_netlink_alloc_skb - Allocate a NETLINK message buffer - * @head: the amount of headroom in bytes - * @body: the desired size (minus headroom) in bytes - * @gfp_flags: the alloc flags to pass to alloc_skb() - * - * Description: - * Allocate a NETLINK message buffer based on the sizes given in @head and - * @body. If @head is greater than zero skb_reserve() is called to reserve - * @head bytes at the start of the buffer. Returns a valid sk_buff pointer on - * success, NULL on failure. - * - */ -static inline struct sk_buff *netlbl_netlink_alloc_skb(size_t head, - size_t body, - gfp_t gfp_flags) -{ - struct sk_buff *skb; - - skb = alloc_skb(NLMSG_ALIGN(head + body), gfp_flags); - if (skb == NULL) - return NULL; - if (head > 0) { - skb_reserve(skb, head); - if (skb_tailroom(skb) < body) { - kfree_skb(skb); - return NULL; - } - } - - return skb; -} - /* * NetLabel - Kernel API for accessing the network packet label mappings. * diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index 87e71563335d..e6ce0b3ba62a 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -530,197 +530,42 @@ struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi) } /** - * cipso_v4_doi_dump_all - Dump all the CIPSO DOI definitions into a sk_buff - * @headroom: the amount of headroom to allocate for the sk_buff + * cipso_v4_doi_walk - Iterate through the DOI definitions + * @skip_cnt: skip past this number of DOI definitions, updated + * @callback: callback for each DOI definition + * @cb_arg: argument for the callback function * * Description: - * Dump a list of all the configured DOI values into a sk_buff. The returned - * sk_buff has room at the front of the sk_buff for @headroom bytes. See - * net/netlabel/netlabel_cipso_v4.h for the LISTALL message format. This - * function may fail if another process is changing the DOI list at the same - * time. Returns a pointer to a sk_buff on success, NULL on error. + * Iterate over the DOI definition list, skipping the first @skip_cnt entries. + * For each entry call @callback, if @callback returns a negative value stop + * 'walking' through the list and return. Updates the value in @skip_cnt upon + * return. Returns zero on success, negative values on failure. * */ -struct sk_buff *cipso_v4_doi_dump_all(size_t headroom) +int cipso_v4_doi_walk(u32 *skip_cnt, + int (*callback) (struct cipso_v4_doi *doi_def, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; + int ret_val = -ENOENT; u32 doi_cnt = 0; - ssize_t buf_len; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - doi_cnt += 1; - buf_len += 2 * NETLBL_LEN_U32; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_all_failure; - - if (nla_put_u32(skb, NLA_U32, doi_cnt) != 0) - goto doi_dump_all_failure; - buf_len -= NETLBL_LEN_U32; - list_for_each_entry_rcu(iter, &cipso_v4_doi_list, list) - if (iter->valid) { - if (buf_len < 2 * NETLBL_LEN_U32) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->doi) != 0) - goto doi_dump_all_failure; - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_all_failure; - buf_len -= 2 * NETLBL_LEN_U32; - } - rcu_read_unlock(); - - return skb; - -doi_dump_all_failure: - rcu_read_unlock(); - kfree(skb); - return NULL; -} - -/** - * cipso_v4_doi_dump - Dump a CIPSO DOI definition into a sk_buff - * @doi: the DOI value - * @headroom: the amount of headroom to allocate for the sk_buff - * - * Description: - * Lookup the DOI definition matching @doi and dump it's contents into a - * sk_buff. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See net/netlabel/netlabel_cipso_v4.h for the LIST message - * format. This function may fail if another process is changing the DOI list - * at the same time. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *cipso_v4_doi_dump(u32 doi, size_t headroom) -{ - struct sk_buff *skb = NULL; - struct cipso_v4_doi *iter; - u32 tag_cnt = 0; - u32 lvl_cnt = 0; - u32 cat_cnt = 0; - ssize_t buf_len; - ssize_t tmp; + struct cipso_v4_doi *iter_doi; rcu_read_lock(); - iter = cipso_v4_doi_getdef(doi); - if (iter == NULL) - goto doi_dump_failure; - buf_len = NETLBL_LEN_U32; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - buf_len += NETLBL_LEN_U32; - while(tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - break; - case CIPSO_V4_MAP_STD: - buf_len += 3 * NETLBL_LEN_U32; - while (tag_cnt < CIPSO_V4_TAG_MAXCNT && - iter->tags[tag_cnt] != CIPSO_V4_TAG_INVALID) { - tag_cnt += 1; - buf_len += NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - lvl_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U8; + list_for_each_entry_rcu(iter_doi, &cipso_v4_doi_list, list) + if (iter_doi->valid) { + if (doi_cnt++ < *skip_cnt) + continue; + ret_val = callback(iter_doi, cb_arg); + if (ret_val < 0) { + doi_cnt--; + goto doi_walk_return; } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - cat_cnt += 1; - buf_len += NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto doi_dump_failure; - - if (nla_put_u32(skb, NLA_U32, iter->type) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - if (iter != cipso_v4_doi_getdef(doi)) - goto doi_dump_failure; - switch (iter->type) { - case CIPSO_V4_MAP_PASS: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; } - break; - case CIPSO_V4_MAP_STD: - if (nla_put_u32(skb, NLA_U32, tag_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, lvl_cnt) != 0) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, cat_cnt) != 0) - goto doi_dump_failure; - buf_len -= 3 * NETLBL_LEN_U32; - for (tmp = 0; - tmp < CIPSO_V4_TAG_MAXCNT && - iter->tags[tmp] != CIPSO_V4_TAG_INVALID; - tmp++) { - if (buf_len < NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u8(skb, NLA_U8, iter->tags[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->lvl.local_size; tmp++) - if (iter->map.std->lvl.local[tmp] != - CIPSO_V4_INV_LVL) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U8) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u8(skb, - NLA_U8, - iter->map.std->lvl.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U8; - } - for (tmp = 0; tmp < iter->map.std->cat.local_size; tmp++) - if (iter->map.std->cat.local[tmp] != - CIPSO_V4_INV_CAT) { - if (buf_len < NETLBL_LEN_U32 + NETLBL_LEN_U16) - goto doi_dump_failure; - if (nla_put_u32(skb, NLA_U32, tmp) != 0) - goto doi_dump_failure; - if (nla_put_u16(skb, - NLA_U16, - iter->map.std->cat.local[tmp]) != 0) - goto doi_dump_failure; - buf_len -= NETLBL_LEN_U32 + NETLBL_LEN_U16; - } - break; - } - rcu_read_unlock(); - - return skb; -doi_dump_failure: +doi_walk_return: rcu_read_unlock(); - kfree(skb); - return NULL; + *skip_cnt = doi_cnt; + return ret_val; } /** diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index 0489a1378101..f56d7a8ac7b7 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -354,160 +354,51 @@ struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain) } /** - * netlbl_domhsh_dump - Dump the domain hash table into a sk_buff + * netlbl_domhsh_walk - Iterate through the domain mapping hash table + * @skip_bkt: the number of buckets to skip at the start + * @skip_chain: the number of entries to skip in the first iterated bucket + * @callback: callback for each entry + * @cb_arg: argument for the callback function * * Description: - * Dump the domain hash table into a buffer suitable for returning to an - * application in response to a NetLabel management DOMAIN message. This - * function may fail if another process is growing the hash table at the same - * time. The returned sk_buff has room at the front of the sk_buff for - * @headroom bytes. See netlabel.h for the DOMAIN message format. Returns a - * pointer to a sk_buff on success, NULL on error. + * Interate over the domain mapping hash table, skipping the first @skip_bkt + * buckets and @skip_chain entries. For each entry in the table call + * @callback, if @callback returns a negative value stop 'walking' through the + * table and return. Updates the values in @skip_bkt and @skip_chain on + * return. Returns zero on succcess, negative values on failure. * */ -struct sk_buff *netlbl_domhsh_dump(size_t headroom) +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg) { - struct sk_buff *skb = NULL; - ssize_t buf_len; - u32 bkt_iter; - u32 dom_cnt = 0; - struct netlbl_domhsh_tbl *hsh_tbl; - struct netlbl_dom_map *list_iter; - ssize_t tmp_len; + int ret_val = -ENOENT; + u32 iter_bkt; + struct netlbl_dom_map *iter_entry; + u32 chain_cnt = 0; - buf_len = NETLBL_LEN_U32; rcu_read_lock(); - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - buf_len += NETLBL_LEN_U32 + - nla_total_size(strlen(list_iter->domain) + 1); - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - dom_cnt++; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_failure; - - if (nla_put_u32(skb, NLA_U32, dom_cnt) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32; - hsh_tbl = rcu_dereference(netlbl_domhsh); - for (bkt_iter = 0; bkt_iter < hsh_tbl->size; bkt_iter++) - list_for_each_entry_rcu(list_iter, - &hsh_tbl->tbl[bkt_iter], list) { - tmp_len = nla_total_size(strlen(list_iter->domain) + - 1); - if (buf_len < NETLBL_LEN_U32 + tmp_len) - goto dump_failure; - if (nla_put_string(skb, - NLA_STRING, - list_iter->domain) != 0) - goto dump_failure; - if (nla_put_u32(skb, NLA_U32, list_iter->type) != 0) - goto dump_failure; - buf_len -= NETLBL_LEN_U32 + tmp_len; - switch (list_iter->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->type) != 0) - goto dump_failure; - if (nla_put_u32(skb, - NLA_U32, - list_iter->type_def.cipsov4->doi) != 0) - goto dump_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; + for (iter_bkt = *skip_bkt; + iter_bkt < rcu_dereference(netlbl_domhsh)->size; + iter_bkt++, chain_cnt = 0) { + list_for_each_entry_rcu(iter_entry, + &netlbl_domhsh->tbl[iter_bkt], + list) + if (iter_entry->valid) { + if (chain_cnt++ < *skip_chain) + continue; + ret_val = callback(iter_entry, cb_arg); + if (ret_val < 0) { + chain_cnt--; + goto walk_return; + } } - } - rcu_read_unlock(); - - return skb; - -dump_failure: - rcu_read_unlock(); - kfree_skb(skb); - return NULL; -} - -/** - * netlbl_domhsh_dump_default - Dump the default domain mapping into a sk_buff - * - * Description: - * Dump the default domain mapping into a buffer suitable for returning to an - * application in response to a NetLabel management DEFDOMAIN message. This - * function may fail if another process is changing the default domain mapping - * at the same time. The returned sk_buff has room at the front of the - * skb_buff for @headroom bytes. See netlabel.h for the DEFDOMAIN message - * format. Returns a pointer to a sk_buff on success, NULL on error. - * - */ -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom) -{ - struct sk_buff *skb; - ssize_t buf_len; - struct netlbl_dom_map *entry; - - buf_len = NETLBL_LEN_U32; - rcu_read_lock(); - entry = rcu_dereference(netlbl_domhsh_def); - if (entry != NULL) - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - buf_len += 2 * NETLBL_LEN_U32; - break; - } - - skb = netlbl_netlink_alloc_skb(headroom, buf_len, GFP_ATOMIC); - if (skb == NULL) - goto dump_default_failure; - - if (entry != rcu_dereference(netlbl_domhsh_def)) - goto dump_default_failure; - if (entry != NULL) { - if (nla_put_u32(skb, NLA_U32, entry->type) != 0) - goto dump_default_failure; - buf_len -= NETLBL_LEN_U32; - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - break; - case NETLBL_NLTYPE_CIPSOV4: - if (buf_len < 2 * NETLBL_LEN_U32) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->type) != 0) - goto dump_default_failure; - if (nla_put_u32(skb, - NLA_U32, - entry->type_def.cipsov4->doi) != 0) - goto dump_default_failure; - buf_len -= 2 * NETLBL_LEN_U32; - break; - } - } else - nla_put_u32(skb, NLA_U32, NETLBL_NLTYPE_NONE); - rcu_read_unlock(); - - return skb; + } -dump_default_failure: +walk_return: rcu_read_unlock(); - kfree_skb(skb); - return NULL; + *skip_bkt = iter_bkt; + *skip_chain = chain_cnt; + return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 99a2287de246..02af72a7877c 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -61,7 +61,9 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry); int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); int netlbl_domhsh_remove_default(void); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); -struct sk_buff *netlbl_domhsh_dump(size_t headroom); -struct sk_buff *netlbl_domhsh_dump_default(size_t headroom); +int netlbl_domhsh_walk(u32 *skip_bkt, + u32 *skip_chain, + int (*callback) (struct netlbl_dom_map *entry, void *arg), + void *cb_arg); #endif diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index 73cbe66e42ff..eeb7d768d2bb 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -74,85 +74,3 @@ int netlbl_netlink_init(void) return 0; } - -/* - * NetLabel Common Protocol Functions - */ - -/** - * netlbl_netlink_send_ack - Send an ACK message - * @info: the generic NETLINK information - * @genl_family: the generic NETLINK family ID value - * @ack_cmd: the generic NETLINK family ACK command value - * @ret_code: return code to use - * - * Description: - * This function sends an ACK message to the sender of the NETLINK message - * specified by @info. - * - */ -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code) -{ - size_t data_size; - struct sk_buff *skb; - - data_size = GENL_HDRLEN + 2 * NETLBL_LEN_U32; - skb = netlbl_netlink_alloc_skb(0, data_size, GFP_KERNEL); - if (skb == NULL) - return; - - if (netlbl_netlink_hdr_put(skb, - info->snd_pid, - 0, - genl_family, - ack_cmd) == NULL) - goto send_ack_failure; - - if (nla_put_u32(skb, NLA_U32, info->snd_seq) != 0) - goto send_ack_failure; - if (nla_put_u32(skb, NLA_U32, ret_code) != 0) - goto send_ack_failure; - - netlbl_netlink_snd(skb, info->snd_pid); - return; - -send_ack_failure: - kfree_skb(skb); -} - -/* - * NETLINK I/O Functions - */ - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: destination PID - * - * Description: - * Sends a unicast NetLabel message over the NETLINK socket. - * - */ -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid) -{ - return genlmsg_unicast(skb, pid); -} - -/** - * netlbl_netlink_snd - Send a NetLabel message - * @skb: NetLabel message - * @pid: sending PID - * @group: multicast group id - * - * Description: - * Sends a multicast NetLabel message over the NETLINK socket to all members - * of @group except @pid. - * - */ -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group) -{ - return genlmsg_multicast(skb, pid, group, GFP_KERNEL); -} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 385a6c7488c6..3f9386b917df 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -40,72 +40,6 @@ /* NetLabel NETLINK helper functions */ -/** - * netlbl_netlink_cap_check - Check the NETLINK msg capabilities - * @skb: the NETLINK buffer - * @req_cap: the required capability - * - * Description: - * Check the NETLINK buffer's capabilities against the required capabilities. - * Returns zero on success, negative values on failure. - * - */ -static inline int netlbl_netlink_cap_check(const struct sk_buff *skb, - kernel_cap_t req_cap) -{ - if (cap_raised(NETLINK_CB(skb).eff_cap, req_cap)) - return 0; - return -EPERM; -} - -/** - * netlbl_getinc_u8 - Read a u8 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u8 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u8 netlbl_getinc_u8(struct nlattr **nla, int *rem_len) -{ - u8 val = nla_get_u8(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u16 - Read a u16 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u16 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u16 netlbl_getinc_u16(struct nlattr **nla, int *rem_len) -{ - u16 val = nla_get_u16(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - -/** - * netlbl_getinc_u32 - Read a u32 value from a nlattr stream and move on - * @nla: the attribute - * @rem_len: remaining length - * - * Description: - * Return a u32 value pointed to by @nla and advance it to the next attribute. - * - */ -static inline u32 netlbl_getinc_u32(struct nlattr **nla, int *rem_len) -{ - u32 val = nla_get_u32(*nla); - *nla = nla_next(*nla, rem_len); - return val; -} - /** * netlbl_netlink_hdr_put - Write the NETLINK buffers into a sk_buff * @skb: the packet @@ -124,6 +58,7 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, u32 pid, u32 seq, int type, + int flags, u8 cmd) { return genlmsg_put(skb, @@ -131,85 +66,13 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, seq, type, 0, - 0, + flags, cmd, NETLBL_PROTO_VERSION); } -/** - * netlbl_netlink_hdr_push - Write the NETLINK buffers into a sk_buff - * @skb: the packet - * @pid: the PID of the receipient - * @seq: the sequence number - * @type: the generic NETLINK message family type - * @cmd: command - * - * Description: - * Write both a NETLINK nlmsghdr structure and a Generic NETLINK genlmsghdr - * struct to the packet. - * - */ -static inline void netlbl_netlink_hdr_push(struct sk_buff *skb, - u32 pid, - u32 seq, - int type, - u8 cmd) - -{ - struct nlmsghdr *nlh; - struct genlmsghdr *hdr; - - nlh = (struct nlmsghdr *)skb_push(skb, NLMSG_SPACE(GENL_HDRLEN)); - nlh->nlmsg_type = type; - nlh->nlmsg_len = skb->len; - nlh->nlmsg_flags = 0; - nlh->nlmsg_pid = pid; - nlh->nlmsg_seq = seq; - - hdr = nlmsg_data(nlh); - hdr->cmd = cmd; - hdr->version = NETLBL_PROTO_VERSION; - hdr->reserved = 0; -} - -/** - * netlbl_netlink_payload_len - Return the length of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns the length of the NetLabel payload. - * - */ -static inline u32 netlbl_netlink_payload_len(const struct sk_buff *skb) -{ - return nlmsg_len((struct nlmsghdr *)skb->data) - GENL_HDRLEN; -} - -/** - * netlbl_netlink_payload_data - Returns a pointer to the start of the payload - * @skb: the NETLINK buffer - * - * Description: - * This function returns a pointer to the start of the NetLabel payload. - * - */ -static inline void *netlbl_netlink_payload_data(const struct sk_buff *skb) -{ - return (unsigned char *)nlmsg_data((struct nlmsghdr *)skb->data) + - GENL_HDRLEN; -} - -/* NetLabel common protocol functions */ - -void netlbl_netlink_send_ack(const struct genl_info *info, - u32 genl_family, - u8 ack_cmd, - u32 ret_code); - /* NetLabel NETLINK I/O functions */ int netlbl_netlink_init(void); -int netlbl_netlink_snd(struct sk_buff *skb, u32 pid); -int netlbl_netlink_snd_multicast(struct sk_buff *skb, u32 pid, u32 group); #endif -- cgit v1.3-8-gc7d7 From 9e12bb22e32389b41222c9d9fb55724fed83a038 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:25:20 -0700 Subject: [IPV4]: ip_route_input() annotations ip_route_input() takes net-endian source and destination address. * Annotated as such. * arguments of its invocations annotated where needed. * local helpers getting the same values passed to by it (ip_route_input_mc(), ip_route_input_slow(), ip_handle_martian_source(), ip_mkroute_input(), ip_mkroute_input_def(), __mkroute_input()) annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/route.h | 2 +- net/ipv4/arp.c | 2 +- net/ipv4/ip_options.c | 2 +- net/ipv4/route.c | 22 ++++++++++++---------- 4 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 7f93ac0e0899..9cfa67089f7f 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -116,7 +116,7 @@ extern void rt_cache_flush(int how); extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); extern int ip_route_output_key(struct rtable **, struct flowi *flp); extern int ip_route_output_flow(struct rtable **rp, struct flowi *flp, struct sock *sk, int flags); -extern int ip_route_input(struct sk_buff*, u32 dst, u32 src, u8 tos, struct net_device *devin); +extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, struct net_device *devin); extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index c8a3723bc001..48e1ccb2ff55 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -710,7 +710,7 @@ static int arp_process(struct sk_buff *skb) unsigned char *arp_ptr; struct rtable *rt; unsigned char *sha, *tha; - u32 sip, tip; + __be32 sip, tip; u16 dev_type = dev->type; int addr_type; struct neighbour *n; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index e7437c091326..87f25486fd64 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -607,7 +607,7 @@ int ip_options_rcv_srr(struct sk_buff *skb) { struct ip_options *opt = &(IPCB(skb)->opt); int srrspace, srrptr; - u32 nexthop; + __be32 nexthop; struct iphdr *iph = skb->nh.iph; unsigned char * optptr = skb->nh.raw + opt->srr; struct rtable *rt = (struct rtable*)skb->dst; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 20ffe8e88c0f..bbe529774fd9 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1596,7 +1596,7 @@ static void rt_set_nexthop(struct rtable *rt, struct fib_result *res, u32 itag) rt->rt_type = res->type; } -static int ip_route_input_mc(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_mc(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev, int our) { unsigned hash; @@ -1681,8 +1681,8 @@ e_inval: static void ip_handle_martian_source(struct net_device *dev, struct in_device *in_dev, struct sk_buff *skb, - u32 daddr, - u32 saddr) + __be32 daddr, + __be32 saddr) { RT_CACHE_STAT_INC(in_martian_src); #ifdef CONFIG_IP_ROUTE_VERBOSE @@ -1712,7 +1712,7 @@ static void ip_handle_martian_source(struct net_device *dev, static inline int __mkroute_input(struct sk_buff *skb, struct fib_result* res, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos, + __be32 daddr, __be32 saddr, u32 tos, struct rtable **result) { @@ -1813,7 +1813,7 @@ static inline int ip_mkroute_input_def(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { struct rtable* rth = NULL; int err; @@ -1838,7 +1838,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, struct fib_result* res, const struct flowi *fl, struct in_device *in_dev, - u32 daddr, u32 saddr, u32 tos) + __be32 daddr, __be32 saddr, u32 tos) { #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED struct rtable* rth = NULL, *rtres; @@ -1901,7 +1901,7 @@ static inline int ip_mkroute_input(struct sk_buff *skb, * 2. IP spoofing attempts are filtered with 100% of guarantee. */ -static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, +static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct fib_result res; @@ -1920,7 +1920,7 @@ static int ip_route_input_slow(struct sk_buff *skb, u32 daddr, u32 saddr, u32 itag = 0; struct rtable * rth; unsigned hash; - u32 spec_dst; + __be32 spec_dst; int err = -EINVAL; int free_res = 0; @@ -2087,7 +2087,7 @@ martian_source: goto e_inval; } -int ip_route_input(struct sk_buff *skb, u32 daddr, u32 saddr, +int ip_route_input(struct sk_buff *skb, __be32 daddr, __be32 saddr, u8 tos, struct net_device *dev) { struct rtable * rth; @@ -2740,7 +2740,9 @@ int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void *arg) struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; struct rtable *rt = NULL; - u32 dst, src, iif; + __be32 dst = 0; + __be32 src = 0; + u32 iif; int err; struct sk_buff *skb; -- cgit v1.3-8-gc7d7 From f7655229c06d041323b40bd6eb9f95ca0ce95506 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:25:43 -0700 Subject: [IPV4]: ip_rt_redirect() annotations The first 4 arguments of ip_rt_redirect() are net-endian. Annotated. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/route.h | 4 ++-- net/ipv4/route.c | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 9cfa67089f7f..64a18811526d 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -109,8 +109,8 @@ extern struct ip_rt_acct *ip_rt_acct; struct in_device; extern int ip_rt_init(void); -extern void ip_rt_redirect(u32 old_gw, u32 dst, u32 new_gw, - u32 src, struct net_device *dev); +extern void ip_rt_redirect(__be32 old_gw, __be32 dst, __be32 new_gw, + __be32 src, struct net_device *dev); extern void ip_rt_advice(struct rtable **rp, int advice); extern void rt_cache_flush(int how); extern int __ip_route_output_key(struct rtable **, const struct flowi *flp); diff --git a/net/ipv4/route.c b/net/ipv4/route.c index bbe529774fd9..e14df8ec5082 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1118,13 +1118,13 @@ static void rt_del(unsigned hash, struct rtable *rt) spin_unlock_bh(rt_hash_lock_addr(hash)); } -void ip_rt_redirect(u32 old_gw, u32 daddr, u32 new_gw, - u32 saddr, struct net_device *dev) +void ip_rt_redirect(__be32 old_gw, __be32 daddr, __be32 new_gw, + __be32 saddr, struct net_device *dev) { int i, k; struct in_device *in_dev = in_dev_get(dev); struct rtable *rth, **rthp; - u32 skeys[2] = { saddr, 0 }; + __be32 skeys[2] = { saddr, 0 }; int ikeys[2] = { dev->ifindex, 0 }; struct netevent_redirect netevent; -- cgit v1.3-8-gc7d7 From f2c3fe24119ee4f8faca08699f0488f500014a27 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:26:42 -0700 Subject: [IPV4]: annotate ipv4 addresses in struct rtable and struct flowi Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/flow.h | 4 ++-- include/net/route.h | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 3ca210ec1379..97569af9bda7 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -16,8 +16,8 @@ struct flowi { union { struct { - __u32 daddr; - __u32 saddr; + __be32 daddr; + __be32 saddr; __u32 fwmark; __u8 tos; __u8 scope; diff --git a/include/net/route.h b/include/net/route.h index 64a18811526d..5bb2b15b4391 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -62,18 +62,18 @@ struct rtable __u16 rt_type; __u16 rt_multipath_alg; - __u32 rt_dst; /* Path destination */ - __u32 rt_src; /* Path source */ + __be32 rt_dst; /* Path destination */ + __be32 rt_src; /* Path source */ int rt_iif; /* Info on neighbour */ - __u32 rt_gateway; + __be32 rt_gateway; /* Cache lookup keys */ struct flowi fl; /* Miscellaneous cached information */ - __u32 rt_spec_dst; /* RFC1122 specific destination */ + __be32 rt_spec_dst; /* RFC1122 specific destination */ struct inet_peer *peer; /* long-living peer info */ }; -- cgit v1.3-8-gc7d7 From bada8adc4e6622764205921e6ba3f717aa03c882 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:27:15 -0700 Subject: [IPV4]: ip_route_connect() ipv4 address arguments annotated annotated address arguments (port number left alone for now); ditto for inferred net-endian variables in callers. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/route.h | 4 ++-- net/dccp/ipv4.c | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/datagram.c | 2 +- net/ipv4/tcp_ipv4.c | 2 +- 5 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 5bb2b15b4391..63524843f6d9 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -144,8 +144,8 @@ static inline char rt_tos2priority(u8 tos) return ip_tos2prio[IPTOS_TOS(tos)>>1]; } -static inline int ip_route_connect(struct rtable **rp, u32 dst, - u32 src, u32 tos, int oif, u8 protocol, +static inline int ip_route_connect(struct rtable **rp, __be32 dst, + __be32 src, u32 tos, int oif, u8 protocol, u16 sport, u16 dport, struct sock *sk) { struct flowi fl = { .oif = oif, diff --git a/net/dccp/ipv4.c b/net/dccp/ipv4.c index 66be29b6f508..bf692c1c116f 100644 --- a/net/dccp/ipv4.c +++ b/net/dccp/ipv4.c @@ -50,7 +50,7 @@ int dccp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct dccp_sock *dp = dccp_sk(sk); const struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index fdd89e37b9aa..250a02b23e4e 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -996,7 +996,7 @@ static int inet_sk_reselect_saddr(struct sock *sk) struct rtable *rt; __u32 old_saddr = inet->saddr; __u32 new_saddr; - __u32 daddr = inet->daddr; + __be32 daddr = inet->daddr; if (inet->opt && inet->opt->srr) daddr = inet->opt->faddr; diff --git a/net/ipv4/datagram.c b/net/ipv4/datagram.c index ec5da4fbd9f4..7b068a891953 100644 --- a/net/ipv4/datagram.c +++ b/net/ipv4/datagram.c @@ -25,7 +25,7 @@ int ip4_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct inet_sock *inet = inet_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *) uaddr; struct rtable *rt; - u32 saddr; + __be32 saddr; int oif; int err; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 39b179856082..9018aa41cb55 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -159,7 +159,7 @@ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) struct tcp_sock *tp = tcp_sk(sk); struct sockaddr_in *usin = (struct sockaddr_in *)uaddr; struct rtable *rt; - u32 daddr, nexthop; + __be32 daddr, nexthop; int tmp; int err; -- cgit v1.3-8-gc7d7 From 011a92610826bdeec4b80ab423958d4c512639f6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:27:35 -0700 Subject: [IPV4]: annotated ipv4 addresses in struct inet_sock Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_sock.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index f6242710f2ff..fc0b9e157b61 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -110,11 +110,11 @@ struct inet_sock { struct ipv6_pinfo *pinet6; #endif /* Socket demultiplex comparisons on incoming packets. */ - __u32 daddr; - __u32 rcv_saddr; + __be32 daddr; + __be32 rcv_saddr; __u16 dport; __u16 num; - __u32 saddr; + __be32 saddr; __s16 uc_ttl; __u16 cmsg_flags; struct ip_options *opt; @@ -129,7 +129,7 @@ struct inet_sock { hdrincl:1, mc_loop:1; int mc_index; - __u32 mc_addr; + __be32 mc_addr; struct ip_mc_socklist *mc_list; struct { unsigned int flags; @@ -137,7 +137,7 @@ struct inet_sock { struct ip_options *opt; struct rtable *rt; int length; /* Total length of all frames */ - u32 addr; + __be32 addr; struct flowi fl; } cork; }; -- cgit v1.3-8-gc7d7 From d9c9df8c9368f4102324e8c3923edae83974602b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 21:28:14 -0700 Subject: [IPV4]: fib_validate_source() annotations annotated arguments and inferred net-endian variables in callers Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++-- net/ipv4/fib_frontend.c | 4 ++-- net/ipv4/route.c | 3 ++- 3 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index fcc159a4ac17..c4eca2575b84 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -222,8 +222,8 @@ extern int inet_rtm_delroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *ar extern int inet_rtm_newroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_rtm_getroute(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg); extern int inet_dump_fib(struct sk_buff *skb, struct netlink_callback *cb); -extern int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, - struct net_device *dev, u32 *spec_dst, u32 *itag); +extern int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, + struct net_device *dev, __be32 *spec_dst, u32 *itag); extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res); struct rtentry; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index cfb527c060e4..684b91f7c2ae 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -180,8 +180,8 @@ unsigned inet_addr_type(u32 addr) - check, that packet arrived from expected physical interface. */ -int fib_validate_source(u32 src, u32 dst, u8 tos, int oif, - struct net_device *dev, u32 *spec_dst, u32 *itag) +int fib_validate_source(__be32 src, __be32 dst, u8 tos, int oif, + struct net_device *dev, __be32 *spec_dst, u32 *itag) { struct in_device *in_dev; struct flowi fl = { .nl_u = { .ip4_u = diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 9d8fbf1e5bc3..4bb66fee4543 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1722,7 +1722,8 @@ static inline int __mkroute_input(struct sk_buff *skb, int err; struct in_device *out_dev; unsigned flags = 0; - u32 spec_dst, itag; + __be32 spec_dst; + u32 itag; /* get a working reference to the output device */ out_dev = in_dev_get(FIB_RES_DEV(*res)); -- cgit v1.3-8-gc7d7 From ed49e3caaa6126f8e29f08e8b4fdcafcae431b57 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:13:54 -0700 Subject: [IPV4]: fib_hn ->nh_gw is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index c4eca2575b84..9f84e9fc1b1f 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -63,7 +63,7 @@ struct fib_nh { __u32 nh_tclassid; #endif int nh_oif; - u32 nh_gw; + __be32 nh_gw; }; /* -- cgit v1.3-8-gc7d7 From b83738ae003dde613712ddb1e90a8a01f5587b51 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:14:15 -0700 Subject: [IPV4]: FIB_RES_PREFSRC() annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 4 ++-- net/ipv4/fib_semantics.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9f84e9fc1b1f..ece746ff4ef8 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -78,7 +78,7 @@ struct fib_info { int fib_dead; unsigned fib_flags; int fib_protocol; - u32 fib_prefsrc; + __be32 fib_prefsrc; u32 fib_priority; u32 fib_metrics[RTAX_MAX]; #define fib_mtu fib_metrics[RTAX_MTU-1] @@ -232,7 +232,7 @@ struct rtentry; extern int ip_fib_check_default(u32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); -extern u32 __fib_res_prefsrc(struct fib_result *res); +extern __be32 __fib_res_prefsrc(struct fib_result *res); /* Exported by fib_hash.c */ extern struct fib_table *fib_hash_init(u32 id); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index 2ead09543f68..152d9a264fab 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -923,7 +923,7 @@ out_fill_res: /* Find appropriate source address to this destination */ -u32 __fib_res_prefsrc(struct fib_result *res) +__be32 __fib_res_prefsrc(struct fib_result *res) { return inet_select_addr(FIB_RES_DEV(*res), FIB_RES_GW(*res), res->scope); } -- cgit v1.3-8-gc7d7 From 00012e5bb9527022cbc843c5d372b282dbe6c4af Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:14:41 -0700 Subject: [IPV4]: introduce nla_get_be32()/NLA_PUT_BE32() net-endian counterparts of nla_get_u32()/NLA_PUT_U32() Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/netlink.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/net') diff --git a/include/net/netlink.h b/include/net/netlink.h index 4ab68a7a636a..ce5cba19c393 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -831,6 +831,9 @@ static inline int nla_put_msecs(struct sk_buff *skb, int attrtype, #define NLA_PUT_U32(skb, attrtype, value) \ NLA_PUT_TYPE(skb, u32, attrtype, value) +#define NLA_PUT_BE32(skb, attrtype, value) \ + NLA_PUT_TYPE(skb, __be32, attrtype, value) + #define NLA_PUT_U64(skb, attrtype, value) \ NLA_PUT_TYPE(skb, u64, attrtype, value) @@ -852,6 +855,15 @@ static inline u32 nla_get_u32(struct nlattr *nla) return *(u32 *) nla_data(nla); } +/** + * nla_get_be32 - return payload of __be32 attribute + * @nla: __be32 netlink attribute + */ +static inline __be32 nla_get_be32(struct nlattr *nla) +{ + return *(__be32 *) nla_data(nla); +} + /** * nla_get_u16 - return payload of u16 attribute * @nla: u16 netlink attribute -- cgit v1.3-8-gc7d7 From 6d85c10abe840e98cbac673202fe7cc9ada2180c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:15:46 -0700 Subject: [IPV4]: struct fib_config IPv4 address fields annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 8 ++++---- net/ipv4/fib_frontend.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index ece746ff4ef8..cacf4c52a8c0 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -30,13 +30,13 @@ struct fib_config { u8 fc_type; /* 1 byte unused */ u32 fc_table; - u32 fc_dst; - u32 fc_src; - u32 fc_gw; + __be32 fc_dst; + __be32 fc_src; + __be32 fc_gw; int fc_oif; u32 fc_flags; u32 fc_priority; - u32 fc_prefsrc; + __be32 fc_prefsrc; struct nlattr *fc_mx; struct rtnexthop *fc_mp; int fc_mx_len; diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 319aaafd3545..7f5217907e5a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -273,7 +273,7 @@ static int put_rtax(struct nlattr *mx, int len, int type, u32 value) static int rtentry_to_fib_config(int cmd, struct rtentry *rt, struct fib_config *cfg) { - u32 addr; + __be32 addr; int plen; memset(cfg, 0, sizeof(*cfg)); -- cgit v1.3-8-gc7d7 From e4883014f48f8c17c17a2526cb5cb6e17c5f94e7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:17:28 -0700 Subject: [IPV4]: icmp_send() annotation The last argument is network-endian (it will go straight into the packet). Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/icmp.h | 2 +- net/ipv4/icmp.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/icmp.h b/include/net/icmp.h index 05f8ff7d9316..dc09474efcf3 100644 --- a/include/net/icmp.h +++ b/include/net/icmp.h @@ -38,7 +38,7 @@ struct dst_entry; struct net_proto_family; struct sk_buff; -extern void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info); +extern void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info); extern int icmp_rcv(struct sk_buff *skb); extern int icmp_ioctl(struct sock *sk, int cmd, unsigned long arg); extern void icmp_init(struct net_proto_family *ops); diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 428f1c91ec45..45396c53560d 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -430,7 +430,7 @@ out_unlock: * MUST reply to only the first fragment. */ -void icmp_send(struct sk_buff *skb_in, int type, int code, u32 info) +void icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info) { struct iphdr *iph; int room; -- cgit v1.3-8-gc7d7 From fd6832220974809141b3981e380b78690bba8911 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:17:51 -0700 Subject: [IPV4]: inet_addr_type() annotations argument and inferred net-endian variables in callers annotated. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- drivers/s390/net/qeth_main.c | 2 +- include/net/route.h | 2 +- net/ipv4/arp.c | 4 ++-- net/ipv4/fib_frontend.c | 2 +- net/ipv4/ip_options.c | 4 ++-- net/ipv4/netfilter/ipt_addrtype.c | 2 +- net/ipv6/af_inet6.c | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/net') diff --git a/drivers/s390/net/qeth_main.c b/drivers/s390/net/qeth_main.c index 5613b4564fa2..8364d5475ac7 100644 --- a/drivers/s390/net/qeth_main.c +++ b/drivers/s390/net/qeth_main.c @@ -8067,7 +8067,7 @@ qeth_arp_constructor(struct neighbour *neigh) neigh->parms = neigh_parms_clone(parms); rcu_read_unlock(); - neigh->type = inet_addr_type(*(u32 *) neigh->primary_key); + neigh->type = inet_addr_type(*(__be32 *) neigh->primary_key); neigh->nud_state = NUD_NOARP; neigh->ops = arp_direct_ops; neigh->output = neigh->ops->queue_xmit; diff --git a/include/net/route.h b/include/net/route.h index 63524843f6d9..58752722c968 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -120,7 +120,7 @@ extern int ip_route_input(struct sk_buff*, __be32 dst, __be32 src, u8 tos, stru extern unsigned short ip_rt_frag_needed(struct iphdr *iph, unsigned short new_mtu); extern void ip_rt_send_redirect(struct sk_buff *skb); -extern unsigned inet_addr_type(u32 addr); +extern unsigned inet_addr_type(__be32 addr); extern void ip_rt_multicast_event(struct in_device *); extern int ip_rt_ioctl(unsigned int cmd, void __user *arg); extern void ip_rt_get_source(u8 *src, struct rtable *rt); diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index db72339c21e1..2a29a5ebc978 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -234,7 +234,7 @@ static u32 arp_hash(const void *pkey, const struct net_device *dev) static int arp_constructor(struct neighbour *neigh) { - u32 addr = *(u32*)neigh->primary_key; + __be32 addr = *(__be32*)neigh->primary_key; struct net_device *dev = neigh->dev; struct in_device *in_dev; struct neigh_parms *parms; @@ -470,7 +470,7 @@ static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, s int arp_find(unsigned char *haddr, struct sk_buff *skb) { struct net_device *dev = skb->dev; - u32 paddr; + __be32 paddr; struct neighbour *n; if (!skb->dst) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c0bd2f122da2..34dc640478a0 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -146,7 +146,7 @@ out: return dev; } -unsigned inet_addr_type(u32 addr) +unsigned inet_addr_type(__be32 addr) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = addr } } }; struct fib_result res; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 87f25486fd64..3245bec23f4d 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -148,7 +148,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) dopt->ts_needtime = 0; if (soffset + 8 <= optlen) { - __u32 addr; + __be32 addr; memcpy(&addr, sptr+soffset-1, 4); if (inet_addr_type(addr) != RTN_LOCAL) { @@ -396,7 +396,7 @@ int ip_options_compile(struct ip_options * opt, struct sk_buff * skb) } opt->ts = optptr - iph; { - u32 addr; + __be32 addr; memcpy(&addr, &optptr[optptr[2]-1], 4); if (inet_addr_type(addr) == RTN_UNICAST) break; diff --git a/net/ipv4/netfilter/ipt_addrtype.c b/net/ipv4/netfilter/ipt_addrtype.c index 893dae210b04..7b60eb74788b 100644 --- a/net/ipv4/netfilter/ipt_addrtype.c +++ b/net/ipv4/netfilter/ipt_addrtype.c @@ -22,7 +22,7 @@ MODULE_LICENSE("GPL"); MODULE_AUTHOR("Patrick McHardy "); MODULE_DESCRIPTION("iptables addrtype match"); -static inline int match_type(u_int32_t addr, u_int16_t mask) +static inline int match_type(__be32 addr, u_int16_t mask) { return !!(mask & (1 << inet_addr_type(addr))); } diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index bf6e8aff19d4..e94eccb99707 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -246,7 +246,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) struct sock *sk = sock->sk; struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); - __u32 v4addr = 0; + __be32 v4addr = 0; unsigned short snum; int addr_type = 0; int err = 0; -- cgit v1.3-8-gc7d7 From d878e72e419db9ff4c66848375ee30a19820e4de Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:18:13 -0700 Subject: [IPV4]: ip_fib_check_default() annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_semantics.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index cacf4c52a8c0..9cd0b56cd758 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -229,7 +229,7 @@ extern void fib_select_multipath(const struct flowi *flp, struct fib_result *res struct rtentry; /* Exported by fib_semantics.c */ -extern int ip_fib_check_default(u32 gw, struct net_device *dev); +extern int ip_fib_check_default(__be32 gw, struct net_device *dev); extern int fib_sync_down(u32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index d91244782216..d853bd2bd0d2 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -248,7 +248,7 @@ static inline unsigned int fib_devindex_hashfn(unsigned int val) Used only by redirect accept routine. */ -int ip_fib_check_default(u32 gw, struct net_device *dev) +int ip_fib_check_default(__be32 gw, struct net_device *dev) { struct hlist_head *head; struct hlist_node *node; -- cgit v1.3-8-gc7d7 From 53576d9b995605a9edc7414b900a9218c8f23b1f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:18:43 -0700 Subject: [IPV4]: inetpeer annotations This one is interesting - we use net-endian value as search key, but order the tree by *host-endian* comparisons of keys. OK since we only care about lookups. Annotated inet_getpeer() and friends. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inetpeer.h | 4 ++-- net/ipv4/inetpeer.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inetpeer.h b/include/net/inetpeer.h index 0965515f40cf..925573fd2aed 100644 --- a/include/net/inetpeer.h +++ b/include/net/inetpeer.h @@ -22,7 +22,7 @@ struct inet_peer unsigned long dtime; /* the time of last use of not * referenced entries */ atomic_t refcnt; - __u32 v4daddr; /* peer's address */ + __be32 v4daddr; /* peer's address */ __u16 avl_height; __u16 ip_id_count; /* IP ID for the next packet */ atomic_t rid; /* Frag reception counter */ @@ -33,7 +33,7 @@ struct inet_peer void inet_initpeers(void) __init; /* can be called with or without local BH being disabled */ -struct inet_peer *inet_getpeer(__u32 daddr, int create); +struct inet_peer *inet_getpeer(__be32 daddr, int create); extern spinlock_t inet_peer_unused_lock; extern struct inet_peer **inet_peer_unused_tailp; diff --git a/net/ipv4/inetpeer.c b/net/ipv4/inetpeer.c index a675602ef295..2b1a54b59c48 100644 --- a/net/ipv4/inetpeer.c +++ b/net/ipv4/inetpeer.c @@ -163,7 +163,7 @@ static void unlink_from_unused(struct inet_peer *p) for (u = peer_root; u != peer_avl_empty; ) { \ if (daddr == u->v4daddr) \ break; \ - if (daddr < u->v4daddr) \ + if ((__force __u32)daddr < (__force __u32)u->v4daddr) \ v = &u->avl_left; \ else \ v = &u->avl_right; \ @@ -368,7 +368,7 @@ static int cleanup_once(unsigned long ttl) } /* Called with or without local BH being disabled. */ -struct inet_peer *inet_getpeer(__u32 daddr, int create) +struct inet_peer *inet_getpeer(__be32 daddr, int create) { struct inet_peer *p, *n; struct inet_peer **stack[PEER_MAXDEPTH], ***stackptr; -- cgit v1.3-8-gc7d7 From 80e856e16a145d7f44f613d9f3d903bf459510ca Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:19:36 -0700 Subject: [IPV4]: annotate addresses in fib_result and fib_result_nl Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 9cd0b56cd758..19f70896ea98 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -107,8 +107,8 @@ struct fib_result { unsigned char type; unsigned char scope; #ifdef CONFIG_IP_ROUTE_MULTIPATH_CACHED - __u32 network; - __u32 netmask; + __be32 network; + __be32 netmask; #endif struct fib_info *fi; #ifdef CONFIG_IP_MULTIPLE_TABLES @@ -117,7 +117,7 @@ struct fib_result { }; struct fib_result_nl { - u32 fl_addr; /* To be looked up*/ + __be32 fl_addr; /* To be looked up*/ u32 fl_fwmark; unsigned char fl_tos; unsigned char fl_scope; -- cgit v1.3-8-gc7d7 From d9cd66e0e593929077b5ecf87384e23db7271c6e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:22:50 -0700 Subject: [IPV4]: multipath_set_nhinfo() annotations multipath_set_nhinfo() (and underlying callback) take net-endian network and netmask. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_mp_alg.h | 4 ++-- net/ipv4/multipath_wrandom.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_mp_alg.h b/include/net/ip_mp_alg.h index ac747b64734c..beffdd66ad74 100644 --- a/include/net/ip_mp_alg.h +++ b/include/net/ip_mp_alg.h @@ -17,7 +17,7 @@ struct ip_mp_alg_ops { void (*mp_alg_select_route)(const struct flowi *flp, struct rtable *rth, struct rtable **rp); void (*mp_alg_flush)(void); - void (*mp_alg_set_nhinfo)(__u32 network, __u32 netmask, + void (*mp_alg_set_nhinfo)(__be32 network, __be32 netmask, unsigned char prefixlen, const struct fib_nh *nh); void (*mp_alg_remove)(struct rtable *rth); @@ -59,7 +59,7 @@ static inline void multipath_flush(void) } static inline void multipath_set_nhinfo(struct rtable *rth, - __u32 network, __u32 netmask, + __be32 network, __be32 netmask, unsigned char prefixlen, const struct fib_nh *nh) { diff --git a/net/ipv4/multipath_wrandom.c b/net/ipv4/multipath_wrandom.c index d25ec4ae09e5..773acc9ccd7c 100644 --- a/net/ipv4/multipath_wrandom.c +++ b/net/ipv4/multipath_wrandom.c @@ -217,8 +217,8 @@ static void wrandom_select_route(const struct flowi *flp, *rp = decision; } -static void wrandom_set_nhinfo(__u32 network, - __u32 netmask, +static void wrandom_set_nhinfo(__be32 network, + __be32 netmask, unsigned char prefixlen, const struct fib_nh *nh) { -- cgit v1.3-8-gc7d7 From 8712f774dc47ec6353c9b75317d6db62e58d9367 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:27:05 -0700 Subject: [IPV4]: ip_options_build() annotations daddr is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_options.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 98f908400771..b9a5bc9487e3 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -335,7 +335,7 @@ extern int ip_net_unreachable(struct sk_buff *skb); * Functions provided by ip_options.c */ -extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, u32 daddr, struct rtable *rt, int is_frag); +extern void ip_options_build(struct sk_buff *skb, struct ip_options *opt, __be32 daddr, struct rtable *rt, int is_frag); extern int ip_options_echo(struct ip_options *dopt, struct sk_buff *skb); extern void ip_options_fragment(struct sk_buff *skb); extern int ip_options_compile(struct ip_options *opt, struct sk_buff *skb); diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 3245bec23f4d..889e14180b04 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -38,7 +38,7 @@ */ void ip_options_build(struct sk_buff * skb, struct ip_options * opt, - u32 daddr, struct rtable *rt, int is_frag) + __be32 daddr, struct rtable *rt, int is_frag) { unsigned char * iph = skb->nh.raw; -- cgit v1.3-8-gc7d7 From 13d8eaa06abfeb708b60fa64203a20db033088b3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 26 Sep 2006 22:27:30 -0700 Subject: [IPV4]: ip_build_and_send_pkt() annotations saddr and daddr are net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_output.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index b9a5bc9487e3..6da1229c041a 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -86,7 +86,7 @@ extern int igmp_mc_proc_init(void); */ extern int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - u32 saddr, u32 daddr, + __be32 saddr, __be32 daddr, struct ip_options *opt); extern int ip_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt, struct net_device *orig_dev); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 97aee76fb746..5bf2f094e317 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -118,7 +118,7 @@ static inline int ip_select_ttl(struct inet_sock *inet, struct dst_entry *dst) * */ int ip_build_and_send_pkt(struct sk_buff *skb, struct sock *sk, - u32 saddr, u32 daddr, struct ip_options *opt) + __be32 saddr, __be32 daddr, struct ip_options *opt) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)skb->dst; -- cgit v1.3-8-gc7d7 From b219e3ac66183fc9771b94af931fb5fd41d586ec Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 6 Jul 2006 12:38:46 +0200 Subject: [Bluetooth] Integrate low-level connections into the driver model This patch integrates the low-level connections (ACL and SCO) into the driver model. Every connection is presented as device with the parent set to its host controller device. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 6 +++ net/bluetooth/hci_conn.c | 8 +++- net/bluetooth/hci_sysfs.c | 79 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 90 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index d84855fe7336..263e42b68e8d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -165,6 +165,10 @@ struct hci_conn { struct timer_list disc_timer; struct timer_list idle_timer; + struct work_struct work; + + struct device dev; + struct hci_dev *hdev; void *l2cap_data; void *sco_data; @@ -412,6 +416,8 @@ static inline int hci_recv_frame(struct sk_buff *skb) int hci_register_sysfs(struct hci_dev *hdev); void hci_unregister_sysfs(struct hci_dev *hdev); +void hci_conn_add_sysfs(struct hci_conn *conn); +void hci_conn_del_sysfs(struct hci_conn *conn); #define SET_HCIDEV_DEV(hdev, pdev) ((hdev)->parent = (pdev)) diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 420ed4d7e57e..7e9515b41cc0 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -179,6 +179,8 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst) if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_ADD); + hci_conn_add_sysfs(conn); + tasklet_enable(&hdev->tx_task); return conn; @@ -211,6 +213,8 @@ int hci_conn_del(struct hci_conn *conn) tasklet_disable(&hdev->tx_task); + hci_conn_del_sysfs(conn); + hci_conn_hash_del(hdev, conn); if (hdev->notify) hdev->notify(hdev, HCI_NOTIFY_CONN_DEL); @@ -221,7 +225,9 @@ int hci_conn_del(struct hci_conn *conn) hci_dev_put(hdev); - kfree(conn); + /* will free via device release */ + put_device(&conn->dev); + return 0; } diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 3ccb4b0b8fc2..58df4360d242 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -170,6 +170,32 @@ static struct device_attribute *bt_attrs[] = { NULL }; +static ssize_t show_conn_type(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + return sprintf(buf, "%s\n", conn->type == ACL_LINK ? "ACL" : "SCO"); +} + +static ssize_t show_conn_address(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_conn *conn = dev_get_drvdata(dev); + bdaddr_t bdaddr; + baswap(&bdaddr, &conn->dst); + return sprintf(buf, "%s\n", batostr(&bdaddr)); +} + +#define CONN_ATTR(_name,_mode,_show,_store) \ +struct device_attribute conn_attr_##_name = __ATTR(_name,_mode,_show,_store) + +static CONN_ATTR(type, S_IRUGO, show_conn_type, NULL); +static CONN_ATTR(address, S_IRUGO, show_conn_address, NULL); + +static struct device_attribute *conn_attrs[] = { + &conn_attr_type, + &conn_attr_address, + NULL +}; + struct class *bt_class = NULL; EXPORT_SYMBOL_GPL(bt_class); @@ -181,8 +207,57 @@ static struct platform_device *bt_platform; static void bt_release(struct device *dev) { - struct hci_dev *hdev = dev_get_drvdata(dev); - kfree(hdev); + void *data = dev_get_drvdata(dev); + kfree(data); +} + +static void add_conn(void *data) +{ + struct hci_conn *conn = data; + int i; + + device_register(&conn->dev); + + for (i = 0; conn_attrs[i]; i++) + device_create_file(&conn->dev, conn_attrs[i]); +} + +void hci_conn_add_sysfs(struct hci_conn *conn) +{ + struct hci_dev *hdev = conn->hdev; + bdaddr_t *ba = &conn->dst; + + BT_DBG("conn %p", conn); + + conn->dev.parent = &hdev->dev; + conn->dev.release = bt_release; + + snprintf(conn->dev.bus_id, BUS_ID_SIZE, + "%s%2.2X%2.2X%2.2X%2.2X%2.2X%2.2X", + conn->type == ACL_LINK ? "acl" : "sco", + ba->b[5], ba->b[4], ba->b[3], + ba->b[2], ba->b[1], ba->b[0]); + + dev_set_drvdata(&conn->dev, conn); + + INIT_WORK(&conn->work, add_conn, (void *) conn); + + schedule_work(&conn->work); +} + +static void del_conn(void *data) +{ + struct hci_conn *conn = data; + device_del(&conn->dev); +} + +void hci_conn_del_sysfs(struct hci_conn *conn) +{ + BT_DBG("conn %p", conn); + + INIT_WORK(&conn->work, del_conn, (void *) conn); + + schedule_work(&conn->work); } int hci_register_sysfs(struct hci_dev *hdev) -- cgit v1.3-8-gc7d7 From 0ac53939a06c610b394aeb0211b985804f2d2da3 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 8 Jul 2006 13:57:15 +0200 Subject: [Bluetooth] Add HCI device identifier for SDIO cards This patch assigns the next free HCI device identifier to Bluetooth devices based on the SDIO interface. Signed-off-by: Marcel Holtmann --- drivers/bluetooth/hci_vhci.c | 2 +- include/net/bluetooth/hci.h | 3 ++- net/bluetooth/hci_sysfs.c | 4 +++- 3 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/drivers/bluetooth/hci_vhci.c b/drivers/bluetooth/hci_vhci.c index fc2130f1776a..a278d98a9151 100644 --- a/drivers/bluetooth/hci_vhci.c +++ b/drivers/bluetooth/hci_vhci.c @@ -276,7 +276,7 @@ static int vhci_open(struct inode *inode, struct file *file) data->hdev = hdev; - hdev->type = HCI_VHCI; + hdev->type = HCI_VIRTUAL; hdev->driver_data = data; hdev->open = vhci_open_dev; diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index b2bdb1aa0429..fde08f452b59 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -44,12 +44,13 @@ #define HCI_NOTIFY_VOICE_SETTING 3 /* HCI device types */ -#define HCI_VHCI 0 +#define HCI_VIRTUAL 0 #define HCI_USB 1 #define HCI_PCCARD 2 #define HCI_UART 3 #define HCI_RS232 4 #define HCI_PCI 5 +#define HCI_SDIO 6 /* HCI device quirks */ enum { diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 58df4360d242..09c61615e961 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -16,7 +16,7 @@ static inline char *typetostr(int type) { switch (type) { - case HCI_VHCI: + case HCI_VIRTUAL: return "VIRTUAL"; case HCI_USB: return "USB"; @@ -28,6 +28,8 @@ static inline char *typetostr(int type) return "RS232"; case HCI_PCI: return "PCI"; + case HCI_SDIO: + return "SDIO"; default: return "UNKNOWN"; } -- cgit v1.3-8-gc7d7 From defc761bc25643eeedee3abd6af0079ef214b55d Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Thu, 21 Sep 2006 16:04:00 +0200 Subject: [Bluetooth] Handle command complete event for exit periodic inquiry The command complete event of the exit periodic inquiry command must clear the HCI_INQUIRY flag and finish the HCI request. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 2 ++ net/bluetooth/hci_event.c | 1 + 2 files changed, 3 insertions(+) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index fde08f452b59..5f04181b8109 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -340,6 +340,8 @@ struct hci_cp_inquiry { #define OCF_INQUIRY_CANCEL 0x0002 +#define OCF_EXIT_PERIODIC_INQ 0x0004 + #define OCF_LINK_KEY_REPLY 0x000B struct hci_cp_link_key_reply { bdaddr_t bdaddr; diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 3896dabab11d..c6cd243939e6 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -62,6 +62,7 @@ static void hci_cc_link_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb switch (ocf) { case OCF_INQUIRY_CANCEL: + case OCF_EXIT_PERIODIC_INQ: status = *((__u8 *) skb->data); if (status) { -- cgit v1.3-8-gc7d7 From 1143e5a6d4d69cd36d44e0184769aa2b17041a10 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Sat, 23 Sep 2006 09:57:20 +0200 Subject: [Bluetooth] Read local version information on device init The local version information are needed to identify certain feature sets of devices. They must be read on device init and stored for later use. It is also possible to access them through the device model. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci_core.h | 3 +++ net/bluetooth/hci_core.c | 3 +++ net/bluetooth/hci_event.c | 21 ++++++++++++++++++++- net/bluetooth/hci_sysfs.c | 24 ++++++++++++++++++++++++ 4 files changed, 50 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 263e42b68e8d..7451a9c92d9d 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -72,6 +72,9 @@ struct hci_dev { __u8 type; bdaddr_t bdaddr; __u8 features[8]; + __u8 hci_ver; + __u16 hci_rev; + __u16 manufacturer; __u16 voice_setting; __u16 pkt_type; diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c index 5ed474277903..338ae977a31b 100644 --- a/net/bluetooth/hci_core.c +++ b/net/bluetooth/hci_core.c @@ -206,6 +206,9 @@ static void hci_init_req(struct hci_dev *hdev, unsigned long opt) /* Read Local Supported Features */ hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_FEATURES, 0, NULL); + /* Read Local Version */ + hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_LOCAL_VERSION, 0, NULL); + /* Read Buffer Size (ACL mtu, max pkt, etc.) */ hci_send_cmd(hdev, OGF_INFO_PARAM, OCF_READ_BUFFER_SIZE, 0, NULL); diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index c6cd243939e6..7518bdbf34cd 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -298,6 +298,7 @@ static void hci_cc_host_ctl(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb /* Command Complete OGF INFO_PARAM */ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *skb) { + struct hci_rp_read_loc_version *lv; struct hci_rp_read_local_features *lf; struct hci_rp_read_buffer_size *bs; struct hci_rp_read_bd_addr *ba; @@ -305,6 +306,23 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s BT_DBG("%s ocf 0x%x", hdev->name, ocf); switch (ocf) { + case OCF_READ_LOCAL_VERSION: + lv = (struct hci_rp_read_loc_version *) skb->data; + + if (lv->status) { + BT_DBG("%s READ_LOCAL_VERSION failed %d", hdev->name, lf->status); + break; + } + + hdev->hci_ver = lv->hci_ver; + hdev->hci_rev = btohs(lv->hci_rev); + hdev->manufacturer = btohs(lv->manufacturer); + + BT_DBG("%s: manufacturer %d hci_ver %d hci_rev %d", hdev->name, + hdev->manufacturer, hdev->hci_ver, hdev->hci_rev); + + break; + case OCF_READ_LOCAL_FEATURES: lf = (struct hci_rp_read_local_features *) skb->data; @@ -329,7 +347,8 @@ static void hci_cc_info_param(struct hci_dev *hdev, __u16 ocf, struct sk_buff *s if (hdev->features[1] & LMP_HV3) hdev->pkt_type |= (HCI_HV3); - BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, lf->features[0], lf->features[1], lf->features[2]); + BT_DBG("%s: features 0x%x 0x%x 0x%x", hdev->name, + lf->features[0], lf->features[1], lf->features[2]); break; diff --git a/net/bluetooth/hci_sysfs.c b/net/bluetooth/hci_sysfs.c index 09c61615e961..a5c4804b77f4 100644 --- a/net/bluetooth/hci_sysfs.c +++ b/net/bluetooth/hci_sysfs.c @@ -49,6 +49,24 @@ static ssize_t show_address(struct device *dev, struct device_attribute *attr, c return sprintf(buf, "%s\n", batostr(&bdaddr)); } +static ssize_t show_manufacturer(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->manufacturer); +} + +static ssize_t show_hci_version(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->hci_ver); +} + +static ssize_t show_hci_revision(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct hci_dev *hdev = dev_get_drvdata(dev); + return sprintf(buf, "%d\n", hdev->hci_rev); +} + static ssize_t show_inquiry_cache(struct device *dev, struct device_attribute *attr, char *buf) { struct hci_dev *hdev = dev_get_drvdata(dev); @@ -153,6 +171,9 @@ static ssize_t store_sniff_min_interval(struct device *dev, struct device_attrib static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); static DEVICE_ATTR(address, S_IRUGO, show_address, NULL); +static DEVICE_ATTR(manufacturer, S_IRUGO, show_manufacturer, NULL); +static DEVICE_ATTR(hci_version, S_IRUGO, show_hci_version, NULL); +static DEVICE_ATTR(hci_revision, S_IRUGO, show_hci_revision, NULL); static DEVICE_ATTR(inquiry_cache, S_IRUGO, show_inquiry_cache, NULL); static DEVICE_ATTR(idle_timeout, S_IRUGO | S_IWUSR, @@ -165,6 +186,9 @@ static DEVICE_ATTR(sniff_min_interval, S_IRUGO | S_IWUSR, static struct device_attribute *bt_attrs[] = { &dev_attr_type, &dev_attr_address, + &dev_attr_manufacturer, + &dev_attr_hci_version, + &dev_attr_hci_revision, &dev_attr_inquiry_cache, &dev_attr_idle_timeout, &dev_attr_sniff_max_interval, -- cgit v1.3-8-gc7d7 From 6ac59344ef25d5f0ebadb5663cf700d25d2a3886 Mon Sep 17 00:00:00 2001 From: Marcel Holtmann Date: Tue, 26 Sep 2006 09:43:48 +0200 Subject: [Bluetooth] Support create connection cancel command In case of non-blocking connects it is possible that the last user of an ACL link quits before the connection has been fully established. This will lead to a race condition where the internal state of a connection is closed, but the actual link has been established and is active. In case of Bluetooth 1.2 and later devices it is possible to call create connection cancel to abort the connect. For older devices the disconnect timer will be used to trigger the needed disconnect. Signed-off-by: Marcel Holtmann --- include/net/bluetooth/hci.h | 6 ++++++ include/net/bluetooth/hci_core.h | 9 ++++++--- net/bluetooth/hci_conn.c | 31 +++++++++++++++++++++++++++---- net/bluetooth/hci_event.c | 4 ++++ 4 files changed, 43 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h index 5f04181b8109..10a3eec191fd 100644 --- a/include/net/bluetooth/hci.h +++ b/include/net/bluetooth/hci.h @@ -297,6 +297,7 @@ struct hci_cp_host_buffer_size { /* Link Control */ #define OGF_LINK_CTL 0x01 + #define OCF_CREATE_CONN 0x0005 struct hci_cp_create_conn { bdaddr_t bdaddr; @@ -307,6 +308,11 @@ struct hci_cp_create_conn { __u8 role_switch; } __attribute__ ((packed)); +#define OCF_CREATE_CONN_CANCEL 0x0008 +struct hci_cp_create_conn_cancel { + bdaddr_t bdaddr; +} __attribute__ ((packed)); + #define OCF_ACCEPT_CONN_REQ 0x0009 struct hci_cp_accept_conn_req { bdaddr_t bdaddr; diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 7451a9c92d9d..df22efcfcc0b 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -316,10 +316,13 @@ static inline void hci_conn_put(struct hci_conn *conn) if (atomic_dec_and_test(&conn->refcnt)) { unsigned long timeo; if (conn->type == ACL_LINK) { - timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT); - if (!conn->out) - timeo *= 2; del_timer(&conn->idle_timer); + if (conn->state == BT_CONNECTED) { + timeo = msecs_to_jiffies(HCI_DISCONN_TIMEOUT); + if (!conn->out) + timeo *= 2; + } else + timeo = msecs_to_jiffies(10); } else timeo = msecs_to_jiffies(10); mod_timer(&conn->disc_timer, jiffies + timeo); diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c index 7e9515b41cc0..90e3a285a17e 100644 --- a/net/bluetooth/hci_conn.c +++ b/net/bluetooth/hci_conn.c @@ -84,6 +84,20 @@ static void hci_acl_connect(struct hci_conn *conn) hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CREATE_CONN, sizeof(cp), &cp); } +static void hci_acl_connect_cancel(struct hci_conn *conn) +{ + struct hci_cp_create_conn_cancel cp; + + BT_DBG("%p", conn); + + if (conn->hdev->hci_ver < 2) + return; + + bacpy(&cp.bdaddr, &conn->dst); + hci_send_cmd(conn->hdev, OGF_LINK_CTL, + OCF_CREATE_CONN_CANCEL, sizeof(cp), &cp); +} + void hci_acl_disconn(struct hci_conn *conn, __u8 reason) { struct hci_cp_disconnect cp; @@ -94,7 +108,8 @@ void hci_acl_disconn(struct hci_conn *conn, __u8 reason) cp.handle = __cpu_to_le16(conn->handle); cp.reason = reason; - hci_send_cmd(conn->hdev, OGF_LINK_CTL, OCF_DISCONNECT, sizeof(cp), &cp); + hci_send_cmd(conn->hdev, OGF_LINK_CTL, + OCF_DISCONNECT, sizeof(cp), &cp); } void hci_add_sco(struct hci_conn *conn, __u16 handle) @@ -124,12 +139,20 @@ static void hci_conn_timeout(unsigned long arg) return; hci_dev_lock(hdev); - if (conn->state == BT_CONNECTED) + + switch (conn->state) { + case BT_CONNECT: + hci_acl_connect_cancel(conn); + break; + case BT_CONNECTED: hci_acl_disconn(conn, 0x13); - else + break; + default: conn->state = BT_CLOSED; + break; + } + hci_dev_unlock(hdev); - return; } static void hci_conn_idle(unsigned long arg) diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c index 7518bdbf34cd..bb25484b8747 100644 --- a/net/bluetooth/hci_event.c +++ b/net/bluetooth/hci_event.c @@ -750,6 +750,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s if (test_bit(HCI_ENCRYPT, &hdev->flags)) conn->link_mode |= HCI_LM_ENCRYPT; + hci_conn_hold(conn); + /* Get remote features */ if (conn->type == ACL_LINK) { struct hci_cp_read_remote_features cp; @@ -778,6 +780,8 @@ static inline void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *s hci_send_cmd(hdev, OGF_LINK_CTL, OCF_CHANGE_CONN_PTYPE, sizeof(cp), &cp); } + + hci_conn_put(conn); } else conn->state = BT_CLOSED; -- cgit v1.3-8-gc7d7 From adaf345b537681c6ed3657941904d976fe72f342 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:27:13 -0700 Subject: [IPV4]: annotate address in inet_request_sock Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 ++-- net/ipv4/tcp_ipv4.c | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index fc0b9e157b61..8130a375b867 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -62,8 +62,8 @@ struct inet_request_sock { u16 inet6_rsk_offset; /* 2 bytes hole, try to pack */ #endif - u32 loc_addr; - u32 rmt_addr; + __be32 loc_addr; + __be32 rmt_addr; u16 rmt_port; u16 snd_wscale : 4, rcv_wscale : 4, diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 9018aa41cb55..89f3cf94685a 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -734,8 +734,8 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) struct inet_request_sock *ireq; struct tcp_options_received tmp_opt; struct request_sock *req; - __u32 saddr = skb->nh.iph->saddr; - __u32 daddr = skb->nh.iph->daddr; + __be32 saddr = skb->nh.iph->saddr; + __be32 daddr = skb->nh.iph->daddr; __u32 isn = TCP_SKB_CB(skb)->when; struct dst_entry *dst = NULL; #ifdef CONFIG_SYN_COOKIES -- cgit v1.3-8-gc7d7 From 7f25afbbefb266520a237df0e9b59112704a7a42 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:27:47 -0700 Subject: [IPV4]: inet_csk_search_req() (partial) annotations raddr is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 4 ++-- net/ipv4/inet_connection_sock.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index de4e83b6da4b..8122bcc83c1d 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -239,8 +239,8 @@ extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); extern struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, const __u16 rport, - const __u32 raddr, - const __u32 laddr); + const __be32 raddr, + const __be32 laddr); extern int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb); extern int inet_csk_get_port(struct inet_hashinfo *hashinfo, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 07204391d083..b9f4b7d5726b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -342,10 +342,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); -static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, +static inline u32 inet_synq_hash(const __be32 raddr, const u16 rport, const u32 rnd, const u16 synq_hsize) { - return jhash_2words(raddr, (u32)rport, rnd) & (synq_hsize - 1); + return jhash_2words((__force u32)raddr, (u32)rport, rnd) & (synq_hsize - 1); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -356,8 +356,8 @@ static inline u32 inet_synq_hash(const u32 raddr, const u16 rport, struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, const __u32 raddr, - const __u32 laddr) + const __u16 rport, const __be32 raddr, + const __be32 laddr) { const struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; -- cgit v1.3-8-gc7d7 From 3ca3c68e76686bee058937ade2b96f4de58ee434 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:28:07 -0700 Subject: [IPV4]: struct ip_options annotations ->faddr is net-endian; annotated as such, variables inferred to be net-endian annotated. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- net/ipv4/af_inet.c | 2 +- net/ipv4/icmp.c | 2 +- net/ipv4/ip_options.c | 2 +- net/ipv4/ip_output.c | 4 ++-- net/ipv4/raw.c | 2 +- net/ipv4/udp.c | 2 +- 7 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 8130a375b867..3c19dbf30211 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -36,7 +36,7 @@ * @ts_needaddr - Need to record addr of outgoing dev */ struct ip_options { - __u32 faddr; + __be32 faddr; unsigned char optlen; unsigned char srr; unsigned char rr; diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index ea5844832683..edcf0932ac6d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -1043,7 +1043,7 @@ int inet_sk_rebuild_header(struct sock *sk) { struct inet_sock *inet = inet_sk(sk); struct rtable *rt = (struct rtable *)__sk_dst_check(sk, 0); - u32 daddr; + __be32 daddr; int err; /* Route is OK, nothing to do. */ diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 45396c53560d..2afa619d17eb 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -381,7 +381,7 @@ static void icmp_reply(struct icmp_bxm *icmp_param, struct sk_buff *skb) struct inet_sock *inet = inet_sk(sk); struct ipcm_cookie ipc; struct rtable *rt = (struct rtable *)skb->dst; - u32 daddr; + __be32 daddr; if (ip_options_echo(&icmp_param->replyopts, skb)) return; diff --git a/net/ipv4/ip_options.c b/net/ipv4/ip_options.c index 889e14180b04..3e6d3d4e50d4 100644 --- a/net/ipv4/ip_options.c +++ b/net/ipv4/ip_options.c @@ -165,7 +165,7 @@ int ip_options_echo(struct ip_options * dopt, struct sk_buff * skb) } if (sopt->srr) { unsigned char * start = sptr+sopt->srr; - u32 faddr; + __be32 faddr; optlen = start[1]; soffset = start[2]; diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index 5bf2f094e317..fc195a44fc2e 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -306,7 +306,7 @@ int ip_queue_xmit(struct sk_buff *skb, int ipfragok) /* Make sure we can route this packet. */ rt = (struct rtable *)__sk_dst_check(sk, 0); if (rt == NULL) { - u32 daddr; + __be32 daddr; /* Use correct destination address if we have options. */ daddr = inet->daddr; @@ -1340,7 +1340,7 @@ void ip_send_reply(struct sock *sk, struct sk_buff *skb, struct ip_reply_arg *ar char data[40]; } replyopts; struct ipcm_cookie ipc; - u32 daddr; + __be32 daddr; struct rtable *rt = (struct rtable*)skb->dst; if (ip_options_echo(&replyopts.opt, skb)) diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 0e935b4c8741..49e5b4b55b93 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -381,7 +381,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct ipcm_cookie ipc; struct rtable *rt = NULL; int free = 0; - u32 daddr; + __be32 daddr; u32 saddr; u8 tos; int err; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 77e265d7bb8f..b63fd3d39024 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -513,7 +513,7 @@ int udp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct rtable *rt = NULL; int free = 0; int connected = 0; - u32 daddr, faddr, saddr; + __be32 daddr, faddr, saddr; u16 dport; u8 tos; int err; -- cgit v1.3-8-gc7d7 From c1d18f9fa09489635a451ee13c1727e1683c2333 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:28:28 -0700 Subject: [IPV4]: struct ipcm_cookie annotation ->addr is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/raw.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 6da1229c041a..b40bd2f9ed79 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -45,7 +45,7 @@ struct inet_skb_parm struct ipcm_cookie { - u32 addr; + __be32 addr; int oif; struct ip_options *opt; }; diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index 49e5b4b55b93..b430cf2a4f66 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -382,7 +382,7 @@ static int raw_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, struct rtable *rt = NULL; int free = 0; __be32 daddr; - u32 saddr; + __be32 saddr; u8 tos; int err; -- cgit v1.3-8-gc7d7 From 4b06a7cf2f3c053e7fc47ca6a4c74553e2291e24 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:29:07 -0700 Subject: [IPV4]: ip_local_error() ipv4 address argument annotated daddr is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index b40bd2f9ed79..96030314f70b 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -364,7 +364,7 @@ extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(s extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, u16 port, u32 info, u8 *payload); -extern void ip_local_error(struct sock *sk, int err, u32 daddr, u16 dport, +extern void ip_local_error(struct sock *sk, int err, __be32 daddr, u16 dport, u32 info); /* sysctl helpers - any sysctl which holds a value that ends up being diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 2d05c4133d3e..d5ca43c0cd73 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -283,7 +283,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ip_local_error(struct sock *sk, int err, u32 daddr, u16 port, u32 info) +void ip_local_error(struct sock *sk, int err, __be32 daddr, u16 port, u32 info) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; -- cgit v1.3-8-gc7d7 From 2816e1284a2db03ad5e205bab4eacbc5f7d4f991 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:33:05 -0700 Subject: [IPV4]: ports in struct inet_sock are net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_sock.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 3c19dbf30211..7a1f69f01269 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -112,13 +112,13 @@ struct inet_sock { /* Socket demultiplex comparisons on incoming packets. */ __be32 daddr; __be32 rcv_saddr; - __u16 dport; + __be16 dport; __u16 num; __be32 saddr; __s16 uc_ttl; __u16 cmsg_flags; struct ip_options *opt; - __u16 sport; + __be16 sport; __u16 id; __u8 tos; __u8 mc_ttl; -- cgit v1.3-8-gc7d7 From cc939d37349bf82891bd1f4558284d7fafe7acb2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:33:22 -0700 Subject: [NET]: ip ports in struct flowi are net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/flow.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 97569af9bda7..5700b0ad63c0 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -56,8 +56,8 @@ struct flowi { #define FLOWI_FLAG_MULTIPATHOLDROUTE 0x01 union { struct { - __u16 sport; - __u16 dport; + __be16 sport; + __be16 dport; } ports; struct { -- cgit v1.3-8-gc7d7 From 0579016ec4691116f6322ec6ed7fb7ce746948e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:33:40 -0700 Subject: [IPV4]: ip_local_error() annotations port argument is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 96030314f70b..7e7e9731ada8 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -364,7 +364,7 @@ extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(s extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, u16 port, u32 info, u8 *payload); -extern void ip_local_error(struct sock *sk, int err, __be32 daddr, u16 dport, +extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); /* sysctl helpers - any sysctl which holds a value that ends up being diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index d5ca43c0cd73..0660494196e6 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -283,7 +283,7 @@ void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, kfree_skb(skb); } -void ip_local_error(struct sock *sk, int err, __be32 daddr, u16 port, u32 info) +void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 port, u32 info) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; -- cgit v1.3-8-gc7d7 From 35986b329f5476630cef00cc7a164ff336ec1a21 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:34:21 -0700 Subject: [IPV4]: ip_icmp_error() annotations port is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip.h | 2 +- net/ipv4/ip_sockglue.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/ip.h b/include/net/ip.h index 7e7e9731ada8..b6d95e553401 100644 --- a/include/net/ip.h +++ b/include/net/ip.h @@ -363,7 +363,7 @@ extern int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(s extern int ip_recv_error(struct sock *sk, struct msghdr *msg, int len); extern void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - u16 port, u32 info, u8 *payload); + __be16 port, u32 info, u8 *payload); extern void ip_local_error(struct sock *sk, int err, __be32 daddr, __be16 dport, u32 info); diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 0660494196e6..4b132953bcc2 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -254,7 +254,7 @@ int ip_ra_control(struct sock *sk, unsigned char on, void (*destructor)(struct s } void ip_icmp_error(struct sock *sk, struct sk_buff *skb, int err, - u16 port, u32 info, u8 *payload) + __be16 port, u32 info, u8 *payload) { struct inet_sock *inet = inet_sk(sk); struct sock_exterr_skb *serr; -- cgit v1.3-8-gc7d7 From 39dccd9d922b595301e5d43ca7a30823d81393b6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:34:41 -0700 Subject: [IPV4]: route.h annotations ip_route_connect(), ip_route_newports() get port numbers net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/route.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/route.h b/include/net/route.h index 58752722c968..486e37aff06c 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -146,7 +146,7 @@ static inline char rt_tos2priority(u8 tos) static inline int ip_route_connect(struct rtable **rp, __be32 dst, __be32 src, u32 tos, int oif, u8 protocol, - u16 sport, u16 dport, struct sock *sk) + __be16 sport, __be16 dport, struct sock *sk) { struct flowi fl = { .oif = oif, .nl_u = { .ip4_u = { .daddr = dst, @@ -172,7 +172,7 @@ static inline int ip_route_connect(struct rtable **rp, __be32 dst, } static inline int ip_route_newports(struct rtable **rp, u8 protocol, - u16 sport, u16 dport, struct sock *sk) + __be16 sport, __be16 dport, struct sock *sk) { if (sport != (*rp)->fl.fl_ip_sport || dport != (*rp)->fl.fl_ip_dport) { -- cgit v1.3-8-gc7d7 From e11be94bf6a3bfc90816d37847e6b5b179ca2ff6 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:35:29 -0700 Subject: [IPV4]: struct inet_request_sock annotations ->port is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_sock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 7a1f69f01269..a448bb226bb3 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -64,7 +64,7 @@ struct inet_request_sock { #endif __be32 loc_addr; __be32 rmt_addr; - u16 rmt_port; + __be16 rmt_port; u16 snd_wscale : 4, rcv_wscale : 4, tstamp_ok : 1, -- cgit v1.3-8-gc7d7 From ed9bad06eec5ee7842851f9abeb406e9a73084e8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:36:36 -0700 Subject: [IPV4] net/ipv4/arp.c: trivial annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/arp.h | 8 ++++---- net/bridge/netfilter/ebt_arpreply.c | 2 +- net/ipv4/arp.c | 30 +++++++++++++++--------------- 3 files changed, 20 insertions(+), 20 deletions(-) (limited to 'include/net') diff --git a/include/net/arp.h b/include/net/arp.h index 643bded9f557..6a3d9a7d302b 100644 --- a/include/net/arp.h +++ b/include/net/arp.h @@ -12,15 +12,15 @@ extern struct neigh_table arp_tbl; extern void arp_init(void); extern int arp_find(unsigned char *haddr, struct sk_buff *skb); extern int arp_ioctl(unsigned int cmd, void __user *arg); -extern void arp_send(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +extern void arp_send(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *th); extern int arp_bind_neighbour(struct dst_entry *dst); extern int arp_mc_map(u32 addr, u8 *haddr, struct net_device *dev, int dir); extern void arp_ifdown(struct net_device *dev); -extern struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +extern struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw); extern void arp_xmit(struct sk_buff *skb); diff --git a/net/bridge/netfilter/ebt_arpreply.c b/net/bridge/netfilter/ebt_arpreply.c index d19fc4b328dc..0aa7b9910a86 100644 --- a/net/bridge/netfilter/ebt_arpreply.c +++ b/net/bridge/netfilter/ebt_arpreply.c @@ -20,7 +20,7 @@ static int ebt_target_reply(struct sk_buff **pskb, unsigned int hooknr, const void *data, unsigned int datalen) { struct ebt_arpreply_info *info = (struct ebt_arpreply_info *)data; - u32 _sip, *siptr, _dip, *diptr; + __be32 _sip, *siptr, _dip, *diptr; struct arphdr _ah, *ap; unsigned char _sha[ETH_ALEN], *shp; struct sk_buff *skb = *pskb; diff --git a/net/ipv4/arp.c b/net/ipv4/arp.c index 2a29a5ebc978..cfe5c8474286 100644 --- a/net/ipv4/arp.c +++ b/net/ipv4/arp.c @@ -385,7 +385,7 @@ static void arp_solicit(struct neighbour *neigh, struct sk_buff *skb) } static int arp_ignore(struct in_device *in_dev, struct net_device *dev, - u32 sip, u32 tip) + __be32 sip, __be32 tip) { int scope; @@ -420,7 +420,7 @@ static int arp_ignore(struct in_device *in_dev, struct net_device *dev, return !inet_confirm_addr(dev, sip, tip, scope); } -static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) +static int arp_filter(__be32 sip, __be32 tip, struct net_device *dev) { struct flowi fl = { .nl_u = { .ip4_u = { .daddr = sip, .saddr = tip } } }; @@ -449,7 +449,7 @@ static int arp_filter(__u32 sip, __u32 tip, struct net_device *dev) * is allowed to use this function, it is scheduled to be removed. --ANK */ -static int arp_set_predefined(int addr_hint, unsigned char * haddr, u32 paddr, struct net_device * dev) +static int arp_set_predefined(int addr_hint, unsigned char * haddr, __be32 paddr, struct net_device * dev) { switch (addr_hint) { case RTN_LOCAL: @@ -511,7 +511,7 @@ int arp_bind_neighbour(struct dst_entry *dst) if (dev == NULL) return -EINVAL; if (n == NULL) { - u32 nexthop = ((struct rtable*)dst)->rt_gateway; + __be32 nexthop = ((struct rtable*)dst)->rt_gateway; if (dev->flags&(IFF_LOOPBACK|IFF_POINTOPOINT)) nexthop = 0; n = __neigh_lookup_errno( @@ -560,8 +560,8 @@ static inline int arp_fwd_proxy(struct in_device *in_dev, struct rtable *rt) * Create an arp packet. If (dest_hw == NULL), we create a broadcast * message. */ -struct sk_buff *arp_create(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +struct sk_buff *arp_create(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -675,8 +675,8 @@ void arp_xmit(struct sk_buff *skb) /* * Create and send an arp packet. */ -void arp_send(int type, int ptype, u32 dest_ip, - struct net_device *dev, u32 src_ip, +void arp_send(int type, int ptype, __be32 dest_ip, + struct net_device *dev, __be32 src_ip, unsigned char *dest_hw, unsigned char *src_hw, unsigned char *target_hw) { @@ -969,13 +969,13 @@ out_of_mem: static int arp_req_set(struct arpreq *r, struct net_device * dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err; if (r->arp_flags&ATF_PUBL) { - u32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; - if (mask && mask != 0xFFFFFFFF) + __be32 mask = ((struct sockaddr_in *) &r->arp_netmask)->sin_addr.s_addr; + if (mask && mask != htonl(0xFFFFFFFF)) return -EINVAL; if (!dev && (r->arp_flags & ATF_COM)) { dev = dev_getbyhwaddr(r->arp_ha.sa_family, r->arp_ha.sa_data); @@ -1063,7 +1063,7 @@ static unsigned arp_state_to_flags(struct neighbour *neigh) static int arp_req_get(struct arpreq *r, struct net_device *dev) { - u32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *) &r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; int err = -ENXIO; @@ -1084,13 +1084,13 @@ static int arp_req_get(struct arpreq *r, struct net_device *dev) static int arp_req_delete(struct arpreq *r, struct net_device * dev) { int err; - u32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; + __be32 ip = ((struct sockaddr_in *)&r->arp_pa)->sin_addr.s_addr; struct neighbour *neigh; if (r->arp_flags & ATF_PUBL) { - u32 mask = + __be32 mask = ((struct sockaddr_in *)&r->arp_netmask)->sin_addr.s_addr; - if (mask == 0xFFFFFFFF) + if (mask == htonl(0xFFFFFFFF)) return pneigh_delete(&arp_tbl, &ip, dev); if (mask == 0) { if (dev == NULL) { -- cgit v1.3-8-gc7d7 From 6b72977bd6c6fefc6497d4f0275079f539eaf0ac Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:36:59 -0700 Subject: [IPV4]: inet_csk_search_req() annotations rport argument is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_connection_sock.h | 2 +- net/ipv4/inet_connection_sock.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 8122bcc83c1d..0bcf9f237e1f 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -238,7 +238,7 @@ extern struct sock *inet_csk_accept(struct sock *sk, int flags, int *err); extern struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, + const __be16 rport, const __be32 raddr, const __be32 laddr); extern int inet_csk_bind_conflict(const struct sock *sk, diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index b9f4b7d5726b..274b0b846c25 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -342,10 +342,10 @@ struct dst_entry* inet_csk_route_req(struct sock *sk, EXPORT_SYMBOL_GPL(inet_csk_route_req); -static inline u32 inet_synq_hash(const __be32 raddr, const u16 rport, +static inline u32 inet_synq_hash(const __be32 raddr, const __be16 rport, const u32 rnd, const u16 synq_hsize) { - return jhash_2words((__force u32)raddr, (u32)rport, rnd) & (synq_hsize - 1); + return jhash_2words((__force u32)raddr, (__force u32)rport, rnd) & (synq_hsize - 1); } #if defined(CONFIG_IPV6) || defined(CONFIG_IPV6_MODULE) @@ -356,7 +356,7 @@ static inline u32 inet_synq_hash(const __be32 raddr, const u16 rport, struct request_sock *inet_csk_search_req(const struct sock *sk, struct request_sock ***prevp, - const __u16 rport, const __be32 raddr, + const __be16 rport, const __be32 raddr, const __be32 laddr) { const struct inet_connection_sock *icsk = inet_csk(sk); -- cgit v1.3-8-gc7d7 From 81f7bf6cbaca02c034b0393c51fc22b29cba20f7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:40:00 -0700 Subject: [IPV4]: net/ipv4/fib annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_fib.h | 2 +- net/ipv4/fib_frontend.c | 7 +++---- net/ipv4/fib_lookup.h | 4 ++-- net/ipv4/fib_rules.c | 12 ++++++------ net/ipv4/fib_semantics.c | 12 ++++++------ 5 files changed, 18 insertions(+), 19 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h index 19f70896ea98..82229146bac7 100644 --- a/include/net/ip_fib.h +++ b/include/net/ip_fib.h @@ -230,7 +230,7 @@ struct rtentry; /* Exported by fib_semantics.c */ extern int ip_fib_check_default(__be32 gw, struct net_device *dev); -extern int fib_sync_down(u32 local, struct net_device *dev, int force); +extern int fib_sync_down(__be32 local, struct net_device *dev, int force); extern int fib_sync_up(struct net_device *dev); extern __be32 __fib_res_prefsrc(struct fib_result *res); diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 34dc640478a0..9c399a70dd5d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -253,7 +253,7 @@ e_inval: #ifndef CONFIG_IP_NOSIOCRT -static inline u32 sk_extract_addr(struct sockaddr *addr) +static inline __be32 sk_extract_addr(struct sockaddr *addr) { return ((struct sockaddr_in *) addr)->sin_addr.s_addr; } @@ -292,7 +292,7 @@ static int rtentry_to_fib_config(int cmd, struct rtentry *rt, plen = 32; addr = sk_extract_addr(&rt->rt_dst); if (!(rt->rt_flags & RTF_HOST)) { - u32 mask = sk_extract_addr(&rt->rt_genmask); + __be32 mask = sk_extract_addr(&rt->rt_genmask); if (rt->rt_genmask.sa_family != AF_INET) { if (mask || rt->rt_genmask.sa_family) @@ -627,8 +627,7 @@ out: only when netlink is already locked. */ -static void fib_magic(int cmd, int type, u32 dst, int dst_len, - struct in_ifaddr *ifa) +static void fib_magic(int cmd, int type, __be32 dst, int dst_len, struct in_ifaddr *ifa) { struct fib_table *tb; struct fib_config cfg = { diff --git a/net/ipv4/fib_lookup.h b/net/ipv4/fib_lookup.h index 6e9b3e86ae84..0e8b70bad4e1 100644 --- a/net/ipv4/fib_lookup.h +++ b/net/ipv4/fib_lookup.h @@ -26,10 +26,10 @@ extern void fib_release_info(struct fib_info *); extern struct fib_info *fib_create_info(struct fib_config *cfg); extern int fib_nh_match(struct fib_config *cfg, struct fib_info *fi); extern int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, u32 dst, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int); -extern void rtmsg_fib(int event, u32 key, struct fib_alias *fa, +extern void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, struct nl_info *info); extern struct fib_alias *fib_find_alias(struct list_head *fah, u8 tos, u32 prio); diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c index 52b2adae4f22..f2d4070aaf01 100644 --- a/net/ipv4/fib_rules.c +++ b/net/ipv4/fib_rules.c @@ -40,10 +40,10 @@ struct fib4_rule u8 dst_len; u8 src_len; u8 tos; - u32 src; - u32 srcmask; - u32 dst; - u32 dstmask; + __be32 src; + __be32 srcmask; + __be32 dst; + __be32 dstmask; #ifdef CONFIG_IP_ROUTE_FWMARK u32 fwmark; u32 fwmask; @@ -150,8 +150,8 @@ void fib_select_default(const struct flowi *flp, struct fib_result *res) static int fib4_rule_match(struct fib_rule *rule, struct flowi *fl, int flags) { struct fib4_rule *r = (struct fib4_rule *) rule; - u32 daddr = fl->fl4_dst; - u32 saddr = fl->fl4_src; + __be32 daddr = fl->fl4_dst; + __be32 saddr = fl->fl4_src; if (((saddr ^ r->src) & r->srcmask) || ((daddr ^ r->dst) & r->dstmask)) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index f7567e92cd9e..884d176e0082 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -203,7 +203,7 @@ static inline unsigned int fib_info_hashfn(const struct fib_info *fi) unsigned int val = fi->fib_nhs; val ^= fi->fib_protocol; - val ^= fi->fib_prefsrc; + val ^= (__force u32)fi->fib_prefsrc; val ^= fi->fib_priority; return (val ^ (val >> 7) ^ (val >> 12)) & mask; @@ -273,7 +273,7 @@ int ip_fib_check_default(__be32 gw, struct net_device *dev) return -1; } -void rtmsg_fib(int event, u32 key, struct fib_alias *fa, +void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, int dst_len, u32 tb_id, struct nl_info *info) { struct sk_buff *skb; @@ -568,11 +568,11 @@ out: return 0; } -static inline unsigned int fib_laddr_hashfn(u32 val) +static inline unsigned int fib_laddr_hashfn(__be32 val) { unsigned int mask = (fib_hash_size - 1); - return (val ^ (val >> 7) ^ (val >> 14)) & mask; + return ((__force u32)val ^ ((__force u32)val >> 7) ^ ((__force u32)val >> 14)) & mask; } static struct hlist_head *fib_hash_alloc(int bytes) @@ -928,7 +928,7 @@ __be32 __fib_res_prefsrc(struct fib_result *res) } int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, - u32 tb_id, u8 type, u8 scope, u32 dst, int dst_len, u8 tos, + u32 tb_id, u8 type, u8 scope, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; @@ -1017,7 +1017,7 @@ nla_put_failure: - device went down -> we must shutdown all nexthops going via it. */ -int fib_sync_down(u32 local, struct net_device *dev, int force) +int fib_sync_down(__be32 local, struct net_device *dev, int force) { int ret = 0; int scope = RT_SCOPE_NOWHERE; -- cgit v1.3-8-gc7d7 From 4f765d842fa6e6fe15d555b247b640118d65b4dd Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:43:07 -0700 Subject: [IPV4]: INET_MATCH() annotations INET_MATCH() and friends depend on an interesting set of kludges: * there's a pair of adjacent fields in struct inet_sock - __be16 dport followed by __u16 num. We want to search by pair, so we combine the keys into a single 32bit value and compare with 32bit value read from &...->dport. * on 64bit targets we combine comparisons with pair of adjacent __be32 fields in the same way. Make sure that we don't mix those values with anything else and that pairs we form them from have correct types. Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/ipv6.h | 2 +- include/net/inet_hashtables.h | 36 +++++++++++++++++++++++++----------- net/ipv4/inet_hashtables.c | 2 +- net/ipv6/inet6_hashtables.c | 8 ++++---- 4 files changed, 31 insertions(+), 17 deletions(-) (limited to 'include/net') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index caca57df0d7d..6dc07ee7702e 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -461,7 +461,7 @@ static inline struct raw6_sock *raw6_sk(const struct sock *sk) #define INET6_MATCH(__sk, __hash, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ ((__sk)->sk_family == AF_INET6) && \ ipv6_addr_equal(&inet6_sk(__sk)->daddr, (__saddr)) && \ ipv6_addr_equal(&inet6_sk(__sk)->rcv_saddr, (__daddr)) && \ diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index b4491c9e2a5a..fb0c09c7090c 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -283,31 +283,45 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, } /* Socket demux engine toys. */ +/* What happens here is ugly; there's a pair of adjacent fields in + struct inet_sock; __be16 dport followed by __u16 num. We want to + search by pair, so we combine the keys into a single 32bit value + and compare with 32bit value read from &...->dport. Let's at least + make sure that it's not mixed with anything else... + On 64bit targets we combine comparisons with pair of adjacent __be32 + fields in the same way. +*/ +typedef __u32 __bitwise __portpair; #ifdef __BIG_ENDIAN #define INET_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__sport) << 16) | (__u32)(__dport)) + ((__force __portpair)(((__force __u32)(__be16)(__sport) << 16) | (__u32)(__dport))) #else /* __LITTLE_ENDIAN */ #define INET_COMBINED_PORTS(__sport, __dport) \ - (((__u32)(__dport) << 16) | (__u32)(__sport)) + ((__force __portpair)(((__u32)(__dport) << 16) | (__force __u32)(__be16)(__sport))) #endif #if (BITS_PER_LONG == 64) +typedef __u64 __bitwise __addrpair; #ifdef __BIG_ENDIAN #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const __u64 __name = (((__u64)(__saddr)) << 32) | ((__u64)(__daddr)); + const __addrpair __name = (__force __addrpair) ( \ + (((__force __u64)(__be32)(__saddr)) << 32) | \ + ((__force __u64)(__be32)(__daddr))); #else /* __LITTLE_ENDIAN */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) \ - const __u64 __name = (((__u64)(__daddr)) << 32) | ((__u64)(__saddr)); + const __addrpair __name = (__force __addrpair) ( \ + (((__force __u64)(__be32)(__daddr)) << 32) | \ + ((__force __u64)(__be32)(__saddr))); #endif /* __BIG_ENDIAN */ #define INET_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u64 *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__addrpair *)&(inet_sk(__sk)->daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __hash, __cookie, __saddr, __daddr, __ports, __dif)\ (((__sk)->sk_hash == (__hash)) && \ - ((*((__u64 *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ - ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + ((*((__addrpair *)&(inet_twsk(__sk)->tw_daddr))) == (__cookie)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #else /* 32-bit arch */ #define INET_ADDR_COOKIE(__name, __saddr, __daddr) @@ -315,13 +329,13 @@ static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, (((__sk)->sk_hash == (__hash)) && \ (inet_sk(__sk)->daddr == (__saddr)) && \ (inet_sk(__sk)->rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_sk(__sk)->dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_sk(__sk)->dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #define INET_TW_MATCH(__sk, __hash,__cookie, __saddr, __daddr, __ports, __dif) \ (((__sk)->sk_hash == (__hash)) && \ (inet_twsk(__sk)->tw_daddr == (__saddr)) && \ (inet_twsk(__sk)->tw_rcv_saddr == (__daddr)) && \ - ((*((__u32 *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ + ((*((__portpair *)&(inet_twsk(__sk)->tw_dport))) == (__ports)) && \ (!((__sk)->sk_bound_dev_if) || ((__sk)->sk_bound_dev_if == (__dif)))) #endif /* 64-bit arch */ @@ -338,7 +352,7 @@ static inline struct sock * const int dif) { INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); struct sock *sk; const struct hlist_node *node; /* Optimize here for direct hit, only listening connections can diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index fb296c9a7f3f..729d1eb39483 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -201,7 +201,7 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, u32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) - const __u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); unsigned int hash = inet_ehashfn(daddr, lport, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); struct sock *sk2; diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index d2f3fc990bfa..8accd1fbeeda 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -64,7 +64,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, { struct sock *sk; const struct hlist_node *node; - const __u32 ports = INET_COMBINED_PORTS(sport, hnum); + const __portpair ports = INET_COMBINED_PORTS(sport, hnum); /* Optimize here for direct hit, only listening connections can * have wildcards anyways. */ @@ -82,7 +82,7 @@ struct sock *__inet6_lookup_established(struct inet_hashinfo *hashinfo, sk_for_each(sk, node, &(head + hashinfo->ehash_size)->chain) { const struct inet_timewait_sock *tw = inet_twsk(sk); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk->sk_family == PF_INET6) { const struct inet6_timewait_sock *tw6 = inet6_twsk(sk); @@ -171,7 +171,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, const struct in6_addr *daddr = &np->rcv_saddr; const struct in6_addr *saddr = &np->daddr; const int dif = sk->sk_bound_dev_if; - const u32 ports = INET_COMBINED_PORTS(inet->dport, lport); + const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); const unsigned int hash = inet6_ehashfn(daddr, inet->num, saddr, inet->dport); struct inet_ehash_bucket *head = inet_ehash_bucket(hinfo, hash); @@ -188,7 +188,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, tw = inet_twsk(sk2); - if(*((__u32 *)&(tw->tw_dport)) == ports && + if(*((__portpair *)&(tw->tw_dport)) == ports && sk2->sk_family == PF_INET6 && ipv6_addr_equal(&tw6->tw_v6_daddr, saddr) && ipv6_addr_equal(&tw6->tw_v6_rcv_saddr, daddr) && -- cgit v1.3-8-gc7d7 From fb99c848e5ae6b8b2bc11f0f90c9e2bb3d702c0d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:43:33 -0700 Subject: [IPV4]: annotate inet_lookup() and friends inet_lookup() annotated along with helper functions (__inet_lookup(), __inet_lookup_established(), inet_lookup_established(), inet_lookup_listener(), __inet_lookup_listener() and inet_ehashfn()) Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_hashtables.h | 20 ++++++++++---------- include/net/inet_sock.h | 12 ++++++------ net/ipv4/inet_hashtables.c | 10 +++++----- 3 files changed, 21 insertions(+), 21 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_hashtables.h b/include/net/inet_hashtables.h index fb0c09c7090c..a9eb2eaf094e 100644 --- a/include/net/inet_hashtables.h +++ b/include/net/inet_hashtables.h @@ -272,12 +272,12 @@ static inline int inet_iif(const struct sk_buff *skb) } extern struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, + const __be32 daddr, const unsigned short hnum, const int dif); static inline struct sock *inet_lookup_listener(struct inet_hashinfo *hashinfo, - u32 daddr, u16 dport, int dif) + __be32 daddr, __be16 dport, int dif) { return __inet_lookup_listener(hashinfo, daddr, ntohs(dport), dif); } @@ -347,8 +347,8 @@ typedef __u64 __bitwise __addrpair; */ static inline struct sock * __inet_lookup_established(struct inet_hashinfo *hashinfo, - const u32 saddr, const u16 sport, - const u32 daddr, const u16 hnum, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const u16 hnum, const int dif) { INET_ADDR_COOKIE(acookie, saddr, daddr) @@ -384,8 +384,8 @@ hit: static inline struct sock * inet_lookup_established(struct inet_hashinfo *hashinfo, - const u32 saddr, const u16 sport, - const u32 daddr, const u16 dport, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, const int dif) { return __inet_lookup_established(hashinfo, saddr, sport, daddr, @@ -393,8 +393,8 @@ static inline struct sock * } static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, - const u32 saddr, const u16 sport, - const u32 daddr, const u16 dport, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, const int dif) { u16 hnum = ntohs(dport); @@ -404,8 +404,8 @@ static inline struct sock *__inet_lookup(struct inet_hashinfo *hashinfo, } static inline struct sock *inet_lookup(struct inet_hashinfo *hashinfo, - const u32 saddr, const u16 sport, - const u32 daddr, const u16 dport, + const __be32 saddr, const __be16 sport, + const __be32 daddr, const __be16 dport, const int dif) { struct sock *sk; diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index a448bb226bb3..ce6da97bc848 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -167,10 +167,10 @@ static inline void inet_sk_copy_descendant(struct sock *sk_to, extern int inet_sk_rebuild_header(struct sock *sk); -static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, - const __u32 faddr, const __u16 fport) +static inline unsigned int inet_ehashfn(const __be32 laddr, const __u16 lport, + const __be32 faddr, const __be16 fport) { - unsigned int h = (laddr ^ lport) ^ (faddr ^ fport); + unsigned int h = ((__force __u32)laddr ^ lport) ^ ((__force __u32)faddr ^ (__force __u32)fport); h ^= h >> 16; h ^= h >> 8; return h; @@ -179,10 +179,10 @@ static inline unsigned int inet_ehashfn(const __u32 laddr, const __u16 lport, static inline int inet_sk_ehashfn(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); - const __u32 laddr = inet->rcv_saddr; + const __be32 laddr = inet->rcv_saddr; const __u16 lport = inet->num; - const __u32 faddr = inet->daddr; - const __u16 fport = inet->dport; + const __be32 faddr = inet->daddr; + const __be16 fport = inet->dport; return inet_ehashfn(laddr, lport, faddr, fport); } diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 729d1eb39483..244c4f445c7d 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c @@ -125,7 +125,7 @@ EXPORT_SYMBOL(inet_listen_wlock); * wildcarded during the search since they can never be otherwise. */ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, - const u32 daddr, + const __be32 daddr, const unsigned short hnum, const int dif) { @@ -137,7 +137,7 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, const struct inet_sock *inet = inet_sk(sk); if (inet->num == hnum && !ipv6_only_sock(sk)) { - const __u32 rcv_saddr = inet->rcv_saddr; + const __be32 rcv_saddr = inet->rcv_saddr; int score = sk->sk_family == PF_INET ? 1 : 0; if (rcv_saddr) { @@ -163,7 +163,7 @@ static struct sock *inet_lookup_listener_slow(const struct hlist_head *head, /* Optimize the common listener case. */ struct sock *__inet_lookup_listener(struct inet_hashinfo *hashinfo, - const u32 daddr, const unsigned short hnum, + const __be32 daddr, const unsigned short hnum, const int dif) { struct sock *sk = NULL; @@ -197,8 +197,8 @@ static int __inet_check_established(struct inet_timewait_death_row *death_row, { struct inet_hashinfo *hinfo = death_row->hashinfo; struct inet_sock *inet = inet_sk(sk); - u32 daddr = inet->rcv_saddr; - u32 saddr = inet->daddr; + __be32 daddr = inet->rcv_saddr; + __be32 saddr = inet->daddr; int dif = sk->sk_bound_dev_if; INET_ADDR_COOKIE(acookie, saddr, daddr) const __portpair ports = INET_COMBINED_PORTS(inet->dport, lport); -- cgit v1.3-8-gc7d7 From 23f33c2d4fd5986243b67a2bf5e63ebae1a76ffa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:43:50 -0700 Subject: [IPV4]: struct inet_timewait_sock annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 8 ++++---- net/ipv4/tcp_ipv4.c | 2 +- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 600cb543550d..5794bc6bf0bf 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -120,10 +120,10 @@ struct inet_timewait_sock { unsigned char tw_rcv_wscale; /* Socket demultiplex comparisons on incoming packets. */ /* these five are in inet_sock */ - __u16 tw_sport; - __u32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); - __u32 tw_rcv_saddr; - __u16 tw_dport; + __be16 tw_sport; + __be32 tw_daddr __attribute__((aligned(INET_TIMEWAIT_ADDRCMP_ALIGN_BYTES))); + __be32 tw_rcv_saddr; + __be16 tw_dport; __u16 tw_num; /* And these are ours. */ __u8 tw_ipv6only:1; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 89f3cf94685a..c83938b8fcb1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1763,7 +1763,7 @@ static void get_tcp4_sock(struct sock *sp, char *tmpbuf, int i) static void get_timewait4_sock(struct inet_timewait_sock *tw, char *tmpbuf, int i) { - unsigned int dest, src; + __be32 dest, src; __u16 destp, srcp; int ttd = tw->tw_ttd - jiffies; -- cgit v1.3-8-gc7d7 From 82103232edc4b4ed48949a195aca93cfa3fe3fa8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:44:10 -0700 Subject: [IPV4]: inet_rcv_saddr() annotations inet_rcv_saddr() returns net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/inet_timewait_sock.h | 2 +- net/ipv4/inet_connection_sock.c | 4 ++-- net/ipv6/addrconf.c | 4 ++-- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/inet_timewait_sock.h b/include/net/inet_timewait_sock.h index 5794bc6bf0bf..6d14c22a00c5 100644 --- a/include/net/inet_timewait_sock.h +++ b/include/net/inet_timewait_sock.h @@ -186,7 +186,7 @@ static inline struct inet_timewait_sock *inet_twsk(const struct sock *sk) return (struct inet_timewait_sock *)sk; } -static inline u32 inet_rcv_saddr(const struct sock *sk) +static inline __be32 inet_rcv_saddr(const struct sock *sk) { return likely(sk->sk_state != TCP_TIME_WAIT) ? inet_sk(sk)->rcv_saddr : inet_twsk(sk)->tw_rcv_saddr; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 274b0b846c25..96bbe2a0aa1b 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -39,7 +39,7 @@ int sysctl_local_port_range[2] = { 1024, 4999 }; int inet_csk_bind_conflict(const struct sock *sk, const struct inet_bind_bucket *tb) { - const u32 sk_rcv_saddr = inet_rcv_saddr(sk); + const __be32 sk_rcv_saddr = inet_rcv_saddr(sk); struct sock *sk2; struct hlist_node *node; int reuse = sk->sk_reuse; @@ -52,7 +52,7 @@ int inet_csk_bind_conflict(const struct sock *sk, sk->sk_bound_dev_if == sk2->sk_bound_dev_if)) { if (!reuse || !sk2->sk_reuse || sk2->sk_state == TCP_LISTEN) { - const u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + const __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); if (!sk2_rcv_saddr || !sk_rcv_saddr || sk2_rcv_saddr == sk_rcv_saddr) break; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index c18676352397..e03c33b2465b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1258,8 +1258,8 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) { const struct in6_addr *sk_rcv_saddr6 = &inet6_sk(sk)->rcv_saddr; const struct in6_addr *sk2_rcv_saddr6 = inet6_rcv_saddr(sk2); - u32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; - u32 sk2_rcv_saddr = inet_rcv_saddr(sk2); + __be32 sk_rcv_saddr = inet_sk(sk)->rcv_saddr; + __be32 sk2_rcv_saddr = inet_rcv_saddr(sk2); int sk_ipv6only = ipv6_only_sock(sk); int sk2_ipv6only = inet_v6_ipv6only(sk2); int addr_type = ipv6_addr_type(sk_rcv_saddr6); -- cgit v1.3-8-gc7d7 From 48818f822d2b2e16f4bf4d1ed1185e7d2146dc34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:44:54 -0700 Subject: [IPV6]: struct in6_addr annotations in6_addr elements are net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/in6.h | 4 ++-- include/net/ipv6.h | 6 +++--- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/linux/in6.h b/include/linux/in6.h index d776829b443f..348ecd4a36fe 100644 --- a/include/linux/in6.h +++ b/include/linux/in6.h @@ -32,8 +32,8 @@ struct in6_addr union { __u8 u6_addr8[16]; - __u16 u6_addr16[8]; - __u32 u6_addr32[4]; + __be16 u6_addr16[8]; + __be32 u6_addr32[4]; } in6_u; #define s6_addr in6_u.u6_addr8 #define s6_addr16 in6_u.u6_addr16 diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 72bf47b2a4e0..8223c4410b4b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -318,8 +318,8 @@ static inline void ipv6_addr_prefix(struct in6_addr *pfx, #ifndef __HAVE_ARCH_ADDR_SET static inline void ipv6_addr_set(struct in6_addr *addr, - __u32 w1, __u32 w2, - __u32 w3, __u32 w4) + __be32 w1, __be32 w2, + __be32 w3, __be32 w4) { addr->s6_addr32[0] = w1; addr->s6_addr32[1] = w2; @@ -337,7 +337,7 @@ static inline int ipv6_addr_equal(const struct in6_addr *a1, a1->s6_addr32[3] == a2->s6_addr32[3]); } -static inline int __ipv6_prefix_equal(const u32 *a1, const u32 *a2, +static inline int __ipv6_prefix_equal(const __be32 *a1, const __be32 *a2, unsigned int prefixlen) { unsigned pdw, pbi; -- cgit v1.3-8-gc7d7 From f9d07e41f89e7305eb2c0475c170c51d21425581 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:45:50 -0700 Subject: [XFRM]: xfrm_flowi_[sd]port() annotations both return net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 11e0b1d6bd47..17e98c4bd877 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -462,9 +462,9 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen) } static __inline__ -u16 xfrm_flowi_sport(struct flowi *fl) +__be16 xfrm_flowi_sport(struct flowi *fl) { - u16 port; + __be16 port; switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: @@ -487,9 +487,9 @@ u16 xfrm_flowi_sport(struct flowi *fl) } static __inline__ -u16 xfrm_flowi_dport(struct flowi *fl) +__be16 xfrm_flowi_dport(struct flowi *fl) { - u16 port; + __be16 port; switch(fl->proto) { case IPPROTO_TCP: case IPPROTO_UDP: -- cgit v1.3-8-gc7d7 From 5f19343fb19613539355296b23cbc08d1336b52d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:46:32 -0700 Subject: [XFRM]: addr_match() annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 17e98c4bd877..b096e9058f3f 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -437,8 +437,8 @@ static inline void xfrm_state_hold(struct xfrm_state *x) static __inline__ int addr_match(void *token1, void *token2, int prefixlen) { - __u32 *a1 = token1; - __u32 *a2 = token2; + __be32 *a1 = token1; + __be32 *a2 = token2; int pdw; int pbi; @@ -450,7 +450,7 @@ static __inline__ int addr_match(void *token1, void *token2, int prefixlen) return 0; if (pbi) { - __u32 mask; + __be32 mask; mask = htonl((0xffffffff) << (32 - pbi)); -- cgit v1.3-8-gc7d7 From 26977b4ed728ae911a162b16dbfe1a165b7cf9a1 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:47:05 -0700 Subject: [XFRM]: xfrm_alloc_spi() annotated Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_state.c | 10 +++++----- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index b096e9058f3f..80a19748e96a 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -989,7 +989,7 @@ struct xfrm_policy *xfrm_policy_bysel_ctx(u8 type, int dir, struct xfrm_policy *xfrm_policy_byid(u8, int dir, u32 id, int delete); void xfrm_policy_flush(u8 type); u32 xfrm_get_acqseq(void); -void xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi); +void xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi); struct xfrm_state * xfrm_find_acq(u8 mode, u32 reqid, u8 proto, xfrm_address_t *daddr, xfrm_address_t *saddr, int create, unsigned short family); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 9f63edd39346..e40a8862db5d 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1040,7 +1040,7 @@ u32 xfrm_get_acqseq(void) EXPORT_SYMBOL(xfrm_get_acqseq); void -xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) +xfrm_alloc_spi(struct xfrm_state *x, __be32 minspi, __be32 maxspi) { unsigned int h; struct xfrm_state *x0; @@ -1057,10 +1057,10 @@ xfrm_alloc_spi(struct xfrm_state *x, u32 minspi, u32 maxspi) x->id.spi = minspi; } else { u32 spi = 0; - minspi = ntohl(minspi); - maxspi = ntohl(maxspi); - for (h=0; hid.daddr, htonl(spi), x->id.proto, x->props.family); if (x0 == NULL) { x->id.spi = htonl(spi); -- cgit v1.3-8-gc7d7 From a94cfd19744a568d97b14bbaa500b2a0c3684f34 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:47:24 -0700 Subject: [XFRM]: xfrm_state_lookup() annotations spi argument of xfrm_state_lookup() is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/ipcomp.c | 2 +- net/ipv6/ipcomp6.c | 4 ++-- net/xfrm/xfrm_state.c | 4 ++-- 4 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 80a19748e96a..6da1c7c72b47 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -912,7 +912,7 @@ extern int xfrm_state_check_expire(struct xfrm_state *x); extern void xfrm_state_insert(struct xfrm_state *x); extern int xfrm_state_add(struct xfrm_state *x); extern int xfrm_state_update(struct xfrm_state *x); -extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family); +extern struct xfrm_state *xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family); extern struct xfrm_state *xfrm_state_lookup_byaddr(xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto, unsigned short family); #ifdef CONFIG_XFRM_SUB_POLICY extern int xfrm_tmpl_sort(struct xfrm_tmpl **dst, struct xfrm_tmpl **src, diff --git a/net/ipv4/ipcomp.c b/net/ipv4/ipcomp.c index 17342430a843..2017d36024d4 100644 --- a/net/ipv4/ipcomp.c +++ b/net/ipv4/ipcomp.c @@ -183,7 +183,7 @@ out_ok: static void ipcomp4_err(struct sk_buff *skb, u32 info) { - u32 spi; + __be32 spi; struct iphdr *iph = (struct iphdr *)skb->data; struct ip_comp_hdr *ipch = (struct ip_comp_hdr *)(skb->data+(iph->ihl<<2)); struct xfrm_state *x; diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c index ad9c6e824e62..a2860e35efd7 100644 --- a/net/ipv6/ipcomp6.c +++ b/net/ipv6/ipcomp6.c @@ -178,7 +178,7 @@ out_ok: static void ipcomp6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, int type, int code, int offset, __u32 info) { - u32 spi; + __be32 spi; struct ipv6hdr *iph = (struct ipv6hdr*)skb->data; struct ipv6_comp_hdr *ipcomph = (struct ipv6_comp_hdr*)(skb->data+offset); struct xfrm_state *x; @@ -234,7 +234,7 @@ static int ipcomp6_tunnel_attach(struct xfrm_state *x) { int err = 0; struct xfrm_state *t = NULL; - u32 spi; + __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&x->props.saddr); if (spi) diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index e40a8862db5d..3692a4783a75 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -421,7 +421,7 @@ xfrm_init_tempsel(struct xfrm_state *x, struct flowi *fl, return 0; } -static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, unsigned short family) +static struct xfrm_state *__xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { unsigned int h = xfrm_spi_hash(daddr, spi, proto, family); struct xfrm_state *x; @@ -916,7 +916,7 @@ err: EXPORT_SYMBOL(xfrm_state_check); struct xfrm_state * -xfrm_state_lookup(xfrm_address_t *daddr, u32 spi, u8 proto, +xfrm_state_lookup(xfrm_address_t *daddr, __be32 spi, u8 proto, unsigned short family) { struct xfrm_state *x; -- cgit v1.3-8-gc7d7 From 6067b2baba32211e84d1ef2dba863422281bd6c7 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:47:59 -0700 Subject: [XFRM]: xfrm_parse_spi() annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/ipv4/xfrm4_input.c | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- net/xfrm/xfrm_input.c | 8 ++++---- 4 files changed, 8 insertions(+), 8 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 6da1c7c72b47..460551968560 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -1004,7 +1004,7 @@ extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pi extern int km_report(u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); -extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq); +extern int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq); extern void xfrm_probe_algs(void); extern int xfrm_count_auth_supported(void); diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c index 040e8475f295..8655d038364c 100644 --- a/net/ipv4/xfrm4_input.c +++ b/net/ipv4/xfrm4_input.c @@ -23,7 +23,7 @@ int xfrm4_rcv(struct sk_buff *skb) EXPORT_SYMBOL(xfrm4_rcv); -static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +static int xfrm4_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { switch (nexthdr) { case IPPROTO_IPIP: @@ -55,7 +55,7 @@ drop: int xfrm4_rcv_encap(struct sk_buff *skb, __u16 encap_type) { int err; - u32 spi, seq; + __be32 spi, seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index a40a05789013..140aa8769acc 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -19,7 +19,7 @@ int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) { int err; - u32 seq; + __be32 seq; struct xfrm_state *xfrm_vec[XFRM_MAX_DEPTH]; struct xfrm_state *x; int xfrm_nr = 0; diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c index dfc90bb1cf1f..e8198a2c785d 100644 --- a/net/xfrm/xfrm_input.c +++ b/net/xfrm/xfrm_input.c @@ -46,7 +46,7 @@ EXPORT_SYMBOL(secpath_dup); /* Fetch spi and seq from ipsec header */ -int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) +int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, __be32 *spi, __be32 *seq) { int offset, offset_seq; @@ -62,7 +62,7 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) case IPPROTO_COMP: if (!pskb_may_pull(skb, sizeof(struct ip_comp_hdr))) return -EINVAL; - *spi = htonl(ntohs(*(u16*)(skb->h.raw + 2))); + *spi = htonl(ntohs(*(__be16*)(skb->h.raw + 2))); *seq = 0; return 0; default: @@ -72,8 +72,8 @@ int xfrm_parse_spi(struct sk_buff *skb, u8 nexthdr, u32 *spi, u32 *seq) if (!pskb_may_pull(skb, 16)) return -EINVAL; - *spi = *(u32*)(skb->h.raw + offset); - *seq = *(u32*)(skb->h.raw + offset_seq); + *spi = *(__be32*)(skb->h.raw + offset); + *seq = *(__be32*)(skb->h.raw + offset_seq); return 0; } EXPORT_SYMBOL(xfrm_parse_spi); -- cgit v1.3-8-gc7d7 From a252cc2371930debe3162f1ac91467b9791324cb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:48:18 -0700 Subject: [XFRM]: xrfm_replay_check() annotations seq argument is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 4 ++-- net/ipv6/xfrm6_input.c | 2 +- net/ipv6/xfrm6_tunnel.c | 2 +- net/xfrm/xfrm_state.c | 5 ++--- 4 files changed, 6 insertions(+), 7 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 460551968560..e6110559e505 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -935,7 +935,7 @@ static inline int xfrm_state_sort(struct xfrm_state **dst, struct xfrm_state **s extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); -extern int xfrm_replay_check(struct xfrm_state *x, u32 seq); +extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); extern void xfrm_replay_notify(struct xfrm_state *x, int event); extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb); @@ -945,7 +945,7 @@ extern int xfrm4_rcv(struct sk_buff *skb); extern int xfrm4_output(struct sk_buff *skb); extern int xfrm4_tunnel_register(struct xfrm_tunnel *handler); extern int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler); -extern int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi); +extern int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi); extern int xfrm6_rcv(struct sk_buff **pskb); extern int xfrm6_input_addr(struct sk_buff *skb, xfrm_address_t *daddr, xfrm_address_t *saddr, u8 proto); diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index 140aa8769acc..5c8b7a568800 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -16,7 +16,7 @@ #include #include -int xfrm6_rcv_spi(struct sk_buff *skb, u32 spi) +int xfrm6_rcv_spi(struct sk_buff *skb, __be32 spi) { int err; __be32 seq; diff --git a/net/ipv6/xfrm6_tunnel.c b/net/ipv6/xfrm6_tunnel.c index 59685ee8f700..7af227bb1551 100644 --- a/net/ipv6/xfrm6_tunnel.c +++ b/net/ipv6/xfrm6_tunnel.c @@ -258,7 +258,7 @@ static int xfrm6_tunnel_input(struct xfrm_state *x, struct sk_buff *skb) static int xfrm6_tunnel_rcv(struct sk_buff *skb) { struct ipv6hdr *iph = skb->nh.ipv6h; - u32 spi; + __be32 spi; spi = xfrm6_tunnel_spi_lookup((xfrm_address_t *)&iph->saddr); return xfrm6_rcv_spi(skb, spi); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 3692a4783a75..33e982961db0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1180,11 +1180,10 @@ static void xfrm_replay_timer_handler(unsigned long data) spin_unlock(&x->lock); } -int xfrm_replay_check(struct xfrm_state *x, u32 seq) +int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) { u32 diff; - - seq = ntohl(seq); + u32 seq = ntohl(net_seq); if (unlikely(seq == 0)) return -EINVAL; -- cgit v1.3-8-gc7d7 From 61f4627b2fecce9d5c9645e4b47e75a0c29ad8c3 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:48:33 -0700 Subject: [XFRM]: xfrm_replay_advance() annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 +- net/xfrm/xfrm_state.c | 5 ++--- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index e6110559e505..1e2a4ddec96e 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -936,7 +936,7 @@ extern struct xfrm_state *xfrm_find_acq_byseq(u32 seq); extern int xfrm_state_delete(struct xfrm_state *x); extern void xfrm_state_flush(u8 proto); extern int xfrm_replay_check(struct xfrm_state *x, __be32 seq); -extern void xfrm_replay_advance(struct xfrm_state *x, u32 seq); +extern void xfrm_replay_advance(struct xfrm_state *x, __be32 seq); extern void xfrm_replay_notify(struct xfrm_state *x, int event); extern int xfrm_state_check(struct xfrm_state *x, struct sk_buff *skb); extern int xfrm_state_mtu(struct xfrm_state *x, int mtu); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 33e982961db0..25657f57fbb0 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -1205,11 +1205,10 @@ int xfrm_replay_check(struct xfrm_state *x, __be32 net_seq) } EXPORT_SYMBOL(xfrm_replay_check); -void xfrm_replay_advance(struct xfrm_state *x, u32 seq) +void xfrm_replay_advance(struct xfrm_state *x, __be32 net_seq) { u32 diff; - - seq = ntohl(seq); + u32 seq = ntohl(net_seq); if (seq > x->replay.seq) { diff = seq - x->replay.seq; -- cgit v1.3-8-gc7d7 From 4324a174304d1d826582be5422a976e09d2b1e63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 27 Sep 2006 18:49:07 -0700 Subject: [XFRM]: fl_ipsec_spi is net-endian Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/flow.h | 2 +- net/ipv4/xfrm4_policy.c | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/net/flow.h b/include/net/flow.h index 5700b0ad63c0..ddf5f3ca1720 100644 --- a/include/net/flow.h +++ b/include/net/flow.h @@ -73,7 +73,7 @@ struct flowi { __u8 objname[16]; /* Not zero terminated */ } dnports; - __u32 spi; + __be32 spi; #ifdef CONFIG_IPV6_MIP6 struct { diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index eabcd27b1767..7a7a00147e55 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -221,7 +221,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_ESP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u32 *ehdr = (u32 *)xprth; + __be32 *ehdr = (__be32 *)xprth; fl->fl_ipsec_spi = ehdr[0]; } @@ -229,7 +229,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_AH: if (pskb_may_pull(skb, xprth + 8 - skb->data)) { - u32 *ah_hdr = (u32*)xprth; + __be32 *ah_hdr = (__be32*)xprth; fl->fl_ipsec_spi = ah_hdr[1]; } @@ -237,7 +237,7 @@ _decode_session4(struct sk_buff *skb, struct flowi *fl) case IPPROTO_COMP: if (pskb_may_pull(skb, xprth + 4 - skb->data)) { - u16 *ipcomp_hdr = (u16 *)xprth; + __be16 *ipcomp_hdr = (__be16 *)xprth; fl->fl_ipsec_spi = htonl(ntohs(ipcomp_hdr[1])); } -- cgit v1.3-8-gc7d7 From 1b0fee7d68f234be6b270cda51d9fcb71bebd780 Mon Sep 17 00:00:00 2001 From: Samuel Ortiz Date: Wed, 27 Sep 2006 20:06:44 -0700 Subject: [IrDA]: Memory allocations cleanups This patch replaces the bunch of arbitrary 64 and 128 bytes alloc_skb() calls with more accurate allocation sizes. Signed-off-by: Samuel Ortiz Signed-off-by: David S. Miller --- include/net/irda/irlan_common.h | 10 ++++++- include/net/irda/irlap_frame.h | 31 ++++++++++++++++++++-- include/net/irda/irlmp.h | 2 +- net/irda/af_irda.c | 3 ++- net/irda/ircomm/ircomm_lmp.c | 4 +-- net/irda/iriap.c | 9 ++++--- net/irda/iriap_event.c | 2 +- net/irda/irlan/irlan_common.c | 46 +++++++++++++++++++++++++------- net/irda/irlan/irlan_provider.c | 12 ++++++--- net/irda/irlap_frame.c | 59 ++++++++++++++++++++++------------------- net/irda/irlmp.c | 2 +- net/irda/irttp.c | 14 +++++----- 12 files changed, 136 insertions(+), 58 deletions(-) (limited to 'include/net') diff --git a/include/net/irda/irlan_common.h b/include/net/irda/irlan_common.h index 1c73bdbc3eb3..9592c374b41d 100644 --- a/include/net/irda/irlan_common.h +++ b/include/net/irda/irlan_common.h @@ -98,7 +98,15 @@ #define IRLAN_SHORT 1 #define IRLAN_ARRAY 2 -#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER+LAP_MAX_HEADER) +/* IrLAN sits on top if IrTTP */ +#define IRLAN_MAX_HEADER (TTP_HEADER+LMP_HEADER) +/* 1 byte for the command code and 1 byte for the parameter count */ +#define IRLAN_CMD_HEADER 2 + +#define IRLAN_STRING_PARAMETER_LEN(name, value) (1 + strlen((name)) + 2 \ + + strlen ((value))) +#define IRLAN_BYTE_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 1) +#define IRLAN_SHORT_PARAMETER_LEN(name) (1 + strlen((name)) + 2 + 2) /* * IrLAN client diff --git a/include/net/irda/irlap_frame.h b/include/net/irda/irlap_frame.h index 3452ae257c84..9dd54a5002b2 100644 --- a/include/net/irda/irlap_frame.h +++ b/include/net/irda/irlap_frame.h @@ -74,6 +74,19 @@ struct discovery_t; #define PF_BIT 0x10 /* Poll/final bit */ +/* Some IrLAP field lengths */ +/* + * Only baud rate triplet is 4 bytes (PV can be 2 bytes). + * All others params (7) are 3 bytes, so that's 7*3 + 1*4 bytes. + */ +#define IRLAP_NEGOCIATION_PARAMS_LEN 25 +#define IRLAP_DISCOVERY_INFO_LEN 32 + +struct disc_frame { + __u8 caddr; /* Connection address */ + __u8 control; +} IRDA_PACK; + struct xid_frame { __u8 caddr; /* Connection address */ __u8 control; @@ -95,11 +108,25 @@ struct test_frame { struct ua_frame { __u8 caddr; __u8 control; - __u32 saddr; /* Source device address */ __u32 daddr; /* Dest device address */ } IRDA_PACK; - + +struct dm_frame { + __u8 caddr; /* Connection address */ + __u8 control; +} IRDA_PACK; + +struct rd_frame { + __u8 caddr; /* Connection address */ + __u8 control; +} IRDA_PACK; + +struct rr_frame { + __u8 caddr; /* Connection address */ + __u8 control; +} IRDA_PACK; + struct i_frame { __u8 caddr; __u8 control; diff --git a/include/net/irda/irlmp.h b/include/net/irda/irlmp.h index 11ecfa58a648..e212b9bc2503 100644 --- a/include/net/irda/irlmp.h +++ b/include/net/irda/irlmp.h @@ -48,7 +48,7 @@ #define DEV_ADDR_ANY 0xffffffff #define LMP_HEADER 2 /* Dest LSAP + Source LSAP */ -#define LMP_CONTROL_HEADER 4 +#define LMP_CONTROL_HEADER 4 /* LMP_HEADER + opcode + parameter */ #define LMP_PID_HEADER 1 /* Used by Ultra */ #define LMP_MAX_HEADER (LMP_CONTROL_HEADER+LAP_MAX_HEADER) diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index 7b7cd5bd2a06..7e1aea89ef05 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -309,7 +309,8 @@ static void irda_connect_response(struct irda_sock *self) IRDA_ASSERT(self != NULL, return;); - skb = alloc_skb(64, GFP_ATOMIC); + skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (skb == NULL) { IRDA_DEBUG(0, "%s() Unable to allocate sk_buff!\n", __FUNCTION__); diff --git a/net/irda/ircomm/ircomm_lmp.c b/net/irda/ircomm/ircomm_lmp.c index 959874b6451f..c8e0d89ee11f 100644 --- a/net/irda/ircomm/ircomm_lmp.c +++ b/net/irda/ircomm/ircomm_lmp.c @@ -81,7 +81,7 @@ static int ircomm_lmp_connect_response(struct ircomm_cb *self, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -115,7 +115,7 @@ static int ircomm_lmp_disconnect_request(struct ircomm_cb *self, IRDA_DEBUG(0, "%s()\n", __FUNCTION__ ); if (!userdata) { - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; diff --git a/net/irda/iriap.c b/net/irda/iriap.c index 61128aa05b40..415cf4eec23b 100644 --- a/net/irda/iriap.c +++ b/net/irda/iriap.c @@ -345,10 +345,11 @@ static void iriap_disconnect_request(struct iriap_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (tx_skb == NULL) { - IRDA_DEBUG(0, "%s(), Could not allocate an sk_buff of length %d\n", - __FUNCTION__, 64); + IRDA_DEBUG(0, + "%s(), Could not allocate an sk_buff of length %d\n", + __FUNCTION__, LMP_MAX_HEADER); return; } @@ -701,7 +702,7 @@ void iriap_send_ack(struct iriap_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IAS_MAGIC, return;); - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER + 1, GFP_ATOMIC); if (!tx_skb) return; diff --git a/net/irda/iriap_event.c b/net/irda/iriap_event.c index da17395df05a..99b18dc7a0b7 100644 --- a/net/irda/iriap_event.c +++ b/net/irda/iriap_event.c @@ -365,7 +365,7 @@ static void state_r_disconnect(struct iriap_cb *self, IRIAP_EVENT event, switch (event) { case IAP_LM_CONNECT_INDICATION: - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (tx_skb == NULL) { IRDA_WARNING("%s: unable to malloc!\n", __FUNCTION__); return; diff --git a/net/irda/irlan/irlan_common.c b/net/irda/irlan/irlan_common.c index 7dd0a2fe1d20..9b962f247714 100644 --- a/net/irda/irlan/irlan_common.c +++ b/net/irda/irlan/irlan_common.c @@ -636,7 +636,8 @@ void irlan_get_provider_info(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(64, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER, + GFP_ATOMIC); if (!skb) return; @@ -668,7 +669,10 @@ void irlan_open_data_channel(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(64, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3") + + IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "DIRECT"), + GFP_ATOMIC); if (!skb) return; @@ -704,7 +708,9 @@ void irlan_close_data_channel(struct irlan_cb *self) if (self->client.tsap_ctrl == NULL) return; - skb = alloc_skb(64, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN"), + GFP_ATOMIC); if (!skb) return; @@ -715,7 +721,7 @@ void irlan_close_data_channel(struct irlan_cb *self) /* Build frame */ frame[0] = CMD_CLOSE_DATA_CHAN; - frame[1] = 0x01; /* Two parameters */ + frame[1] = 0x01; /* One parameter */ irlan_insert_byte_param(skb, "DATA_CHAN", self->dtsap_sel_data); @@ -739,7 +745,11 @@ static void irlan_open_unicast_addr(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), + GFP_ATOMIC); if (!skb) return; @@ -777,7 +787,12 @@ void irlan_set_broadcast_filter(struct irlan_cb *self, int status) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BROADCAST") + + /* We may waste one byte here...*/ + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "FILTER"), + GFP_ATOMIC); if (!skb) return; @@ -816,7 +831,12 @@ void irlan_set_multicast_filter(struct irlan_cb *self, int status) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + + /* We may waste one byte here...*/ + IRLAN_STRING_PARAMETER_LEN("FILTER_MODE", "NONE"), + GFP_ATOMIC); if (!skb) return; @@ -856,7 +876,12 @@ static void irlan_get_unicast_addr(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_BYTE_PARAMETER_LEN("DATA_CHAN") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_OPERATION", + "DYNAMIC"), + GFP_ATOMIC); if (!skb) return; @@ -891,7 +916,10 @@ void irlan_get_media_char(struct irlan_cb *self) IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(64, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + IRLAN_STRING_PARAMETER_LEN("MEDIA", "802.3"), + GFP_ATOMIC); + if (!skb) return; diff --git a/net/irda/irlan/irlan_provider.c b/net/irda/irlan/irlan_provider.c index 9c0df86044d7..58efde919667 100644 --- a/net/irda/irlan/irlan_provider.c +++ b/net/irda/irlan/irlan_provider.c @@ -296,7 +296,14 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command, IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == IRLAN_MAGIC, return;); - skb = alloc_skb(128, GFP_ATOMIC); + skb = alloc_skb(IRLAN_MAX_HEADER + IRLAN_CMD_HEADER + + /* Bigger param length comes from CMD_GET_MEDIA_CHAR */ + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "DIRECTED") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "BORADCAST") + + IRLAN_STRING_PARAMETER_LEN("FILTER_TYPE", "MULTICAST") + + IRLAN_STRING_PARAMETER_LEN("ACCESS_TYPE", "HOSTED"), + GFP_ATOMIC); + if (!skb) return; @@ -354,8 +361,7 @@ void irlan_provider_send_reply(struct irlan_cb *self, int command, } else skb->data[1] = 0x02; /* 2 parameters */ irlan_insert_byte_param(skb, "DATA_CHAN", self->stsap_sel_data); - irlan_insert_array_param(skb, "RECONNECT_KEY", "LINUX RULES!", - 12); + irlan_insert_string_param(skb, "RECONNECT_KEY", "LINUX RULES!"); break; case CMD_FILTER_OPERATION: irlan_filter_request(self, skb); diff --git a/net/irda/irlap_frame.c b/net/irda/irlap_frame.c index ccb983bf0f4a..dba349c832d0 100644 --- a/net/irda/irlap_frame.c +++ b/net/irda/irlap_frame.c @@ -117,7 +117,9 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) IRDA_ASSERT(self->magic == LAP_MAGIC, return;); /* Allocate frame */ - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct snrm_frame) + + IRLAP_NEGOCIATION_PARAMS_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -136,7 +138,7 @@ void irlap_send_snrm_frame(struct irlap_cb *self, struct qos_info *qos) * If we are establishing a connection then insert QoS paramerters */ if (qos) { - skb_put(tx_skb, 9); /* 21 left */ + skb_put(tx_skb, 9); /* 25 left */ frame->saddr = cpu_to_le32(self->saddr); frame->daddr = cpu_to_le32(self->daddr); @@ -210,7 +212,9 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) IRDA_ASSERT(self->magic == LAP_MAGIC, return;); /* Allocate frame */ - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct ua_frame) + + IRLAP_NEGOCIATION_PARAMS_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -245,23 +249,23 @@ void irlap_send_ua_response_frame(struct irlap_cb *self, struct qos_info *qos) void irlap_send_dm_frame( struct irlap_cb *self) { struct sk_buff *tx_skb = NULL; - __u8 *frame; + struct dm_frame *frame; IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - tx_skb = alloc_skb(32, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct dm_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct dm_frame *)skb_put(tx_skb, 2); if (self->state == LAP_NDM) - frame[0] = CBROADCAST; + frame->caddr = CBROADCAST; else - frame[0] = self->caddr; + frame->caddr = self->caddr; - frame[1] = DM_RSP | PF_BIT; + frame->control = DM_RSP | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -275,21 +279,21 @@ void irlap_send_dm_frame( struct irlap_cb *self) void irlap_send_disc_frame(struct irlap_cb *self) { struct sk_buff *tx_skb = NULL; - __u8 *frame; + struct disc_frame *frame; IRDA_DEBUG(3, "%s()\n", __FUNCTION__); IRDA_ASSERT(self != NULL, return;); IRDA_ASSERT(self->magic == LAP_MAGIC, return;); - tx_skb = alloc_skb(16, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct disc_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct disc_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr | CMD_FRAME; - frame[1] = DISC_CMD | PF_BIT; + frame->caddr = self->caddr | CMD_FRAME; + frame->control = DISC_CMD | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -315,7 +319,8 @@ void irlap_send_discovery_xid_frame(struct irlap_cb *self, int S, __u8 s, IRDA_ASSERT(self->magic == LAP_MAGIC, return;); IRDA_ASSERT(discovery != NULL, return;); - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct xid_frame) + IRLAP_DISCOVERY_INFO_LEN, + GFP_ATOMIC); if (!tx_skb) return; @@ -573,18 +578,18 @@ static void irlap_recv_discovery_xid_cmd(struct irlap_cb *self, void irlap_send_rr_frame(struct irlap_cb *self, int command) { struct sk_buff *tx_skb; - __u8 *frame; + struct rr_frame *frame; - tx_skb = alloc_skb(16, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct rr_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct rr_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr; - frame[0] |= (command) ? CMD_FRAME : 0; + frame->caddr = self->caddr; + frame->caddr |= (command) ? CMD_FRAME : 0; - frame[1] = RR | PF_BIT | (self->vr << 5); + frame->control = RR | PF_BIT | (self->vr << 5); irlap_queue_xmit(self, tx_skb); } @@ -598,16 +603,16 @@ void irlap_send_rr_frame(struct irlap_cb *self, int command) void irlap_send_rd_frame(struct irlap_cb *self) { struct sk_buff *tx_skb; - __u8 *frame; + struct rd_frame *frame; - tx_skb = alloc_skb(16, GFP_ATOMIC); + tx_skb = alloc_skb(sizeof(struct rd_frame), GFP_ATOMIC); if (!tx_skb) return; - frame = skb_put(tx_skb, 2); + frame = (struct rd_frame *)skb_put(tx_skb, 2); - frame[0] = self->caddr; - frame[1] = RD_RSP | PF_BIT; + frame->caddr = self->caddr; + frame->caddr = RD_RSP | PF_BIT; irlap_queue_xmit(self, tx_skb); } @@ -1214,7 +1219,7 @@ void irlap_send_test_frame(struct irlap_cb *self, __u8 caddr, __u32 daddr, struct test_frame *frame; __u8 *info; - tx_skb = alloc_skb(cmd->len+sizeof(struct test_frame), GFP_ATOMIC); + tx_skb = alloc_skb(cmd->len + sizeof(struct test_frame), GFP_ATOMIC); if (!tx_skb) return; diff --git a/net/irda/irlmp.c b/net/irda/irlmp.c index c440913dee14..5073261b9d0c 100644 --- a/net/irda/irlmp.c +++ b/net/irda/irlmp.c @@ -392,7 +392,7 @@ int irlmp_connect_request(struct lsap_cb *self, __u8 dlsap_sel, /* Any userdata? */ if (tx_skb == NULL) { - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; diff --git a/net/irda/irttp.c b/net/irda/irttp.c index 42acf1cde737..3c2e70b77df1 100644 --- a/net/irda/irttp.c +++ b/net/irda/irttp.c @@ -804,12 +804,12 @@ static inline void irttp_give_credit(struct tsap_cb *self) self->send_credit, self->avail_credit, self->remote_credit); /* Give credit to peer */ - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(TTP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return; /* Reserve space for LMP, and LAP header */ - skb_reserve(tx_skb, self->max_header_size); + skb_reserve(tx_skb, LMP_MAX_HEADER); /* * Since we can transmit and receive frames concurrently, @@ -1093,7 +1093,8 @@ int irttp_connect_request(struct tsap_cb *self, __u8 dtsap_sel, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -1341,7 +1342,8 @@ int irttp_connect_response(struct tsap_cb *self, __u32 max_sdu_size, /* Any userdata supplied? */ if (userdata == NULL) { - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(TTP_MAX_HEADER + TTP_SAR_HEADER, + GFP_ATOMIC); if (!tx_skb) return -ENOMEM; @@ -1540,14 +1542,14 @@ int irttp_disconnect_request(struct tsap_cb *self, struct sk_buff *userdata, if (!userdata) { struct sk_buff *tx_skb; - tx_skb = alloc_skb(64, GFP_ATOMIC); + tx_skb = alloc_skb(LMP_MAX_HEADER, GFP_ATOMIC); if (!tx_skb) return -ENOMEM; /* * Reserve space for MUX and LAP header */ - skb_reserve(tx_skb, TTP_MAX_HEADER); + skb_reserve(tx_skb, LMP_MAX_HEADER); userdata = tx_skb; } -- cgit v1.3-8-gc7d7 From d77072ecfb6d28287d5e2a61d60d87a3a444ac97 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:20:34 -0700 Subject: [NET]: Annotate dst_ops protocol Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/net/dst.h | 2 +- net/core/neighbour.c | 2 +- net/ethernet/eth.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 13d6d4eb8b3a..9264139bd8df 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -187,7 +187,7 @@ struct hh_cache { struct hh_cache *hh_next; /* Next entry */ atomic_t hh_refcnt; /* number of users */ - unsigned short hh_type; /* protocol identifier, f.e ETH_P_IP + __be16 hh_type; /* protocol identifier, f.e ETH_P_IP * NOTE: For VLANs, this will be the * encapuslated type. --BLG */ diff --git a/include/net/dst.h b/include/net/dst.h index a8d825f90305..e156e38e4ac3 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -84,7 +84,7 @@ struct dst_entry struct dst_ops { unsigned short family; - unsigned short protocol; + __be16 protocol; unsigned gc_thresh; int (*gc)(void); diff --git a/net/core/neighbour.c b/net/core/neighbour.c index b6c69e1463e8..8ce8c471d868 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -1079,7 +1079,7 @@ struct neighbour *neigh_event_ns(struct neigh_table *tbl, } static void neigh_hh_init(struct neighbour *n, struct dst_entry *dst, - u16 protocol) + __be16 protocol) { struct hh_cache *hh; struct net_device *dev = dst->dev; diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c index 43863933f27f..4bd78c8cfb26 100644 --- a/net/ethernet/eth.c +++ b/net/ethernet/eth.c @@ -223,7 +223,7 @@ static int eth_header_parse(struct sk_buff *skb, unsigned char *haddr) */ int eth_header_cache(struct neighbour *neigh, struct hh_cache *hh) { - unsigned short type = hh->hh_type; + __be16 type = hh->hh_type; struct ethhdr *eth; struct net_device *dev = neigh->dev; -- cgit v1.3-8-gc7d7 From 014d730d56b559eacb11e91969a1f41c3feb36f9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 28 Sep 2006 14:29:52 -0700 Subject: [IPVS]: ipvs annotations Signed-off-by: Al Viro Signed-off-by: David S. Miller --- include/net/ip_vs.h | 68 ++++++++++++++++++++--------------------- net/ipv4/ipvs/ip_vs_conn.c | 24 +++++++-------- net/ipv4/ipvs/ip_vs_core.c | 14 ++++----- net/ipv4/ipvs/ip_vs_ctl.c | 26 ++++++++-------- net/ipv4/ipvs/ip_vs_dh.c | 4 +-- net/ipv4/ipvs/ip_vs_ftp.c | 12 ++++---- net/ipv4/ipvs/ip_vs_lblc.c | 8 ++--- net/ipv4/ipvs/ip_vs_lblcr.c | 8 ++--- net/ipv4/ipvs/ip_vs_proto.c | 2 +- net/ipv4/ipvs/ip_vs_proto_tcp.c | 10 +++--- net/ipv4/ipvs/ip_vs_proto_udp.c | 14 ++++----- net/ipv4/ipvs/ip_vs_sh.c | 4 +-- net/ipv4/ipvs/ip_vs_sync.c | 16 +++++----- net/ipv4/ipvs/ip_vs_xmit.c | 2 +- 14 files changed, 106 insertions(+), 106 deletions(-) (limited to 'include/net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index 3b57b159b653..49c717e3b040 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -100,22 +100,22 @@ struct ip_vs_service_user { /* virtual service addresses */ u_int16_t protocol; - u_int32_t addr; /* virtual ip address */ - u_int16_t port; + __be32 addr; /* virtual ip address */ + __be16 port; u_int32_t fwmark; /* firwall mark of service */ /* virtual service options */ char sched_name[IP_VS_SCHEDNAME_MAXLEN]; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout in sec */ - u_int32_t netmask; /* persistent netmask */ + __be32 netmask; /* persistent netmask */ }; struct ip_vs_dest_user { /* destination server address */ - u_int32_t addr; - u_int16_t port; + __be32 addr; + __be16 port; /* real server options */ unsigned conn_flags; /* connection flags */ @@ -163,15 +163,15 @@ struct ip_vs_getinfo { struct ip_vs_service_entry { /* which service: user fills in these */ u_int16_t protocol; - u_int32_t addr; /* virtual address */ - u_int16_t port; + __be32 addr; /* virtual address */ + __be16 port; u_int32_t fwmark; /* firwall mark of service */ /* service options */ char sched_name[IP_VS_SCHEDNAME_MAXLEN]; unsigned flags; /* virtual service flags */ unsigned timeout; /* persistent timeout */ - u_int32_t netmask; /* persistent netmask */ + __be32 netmask; /* persistent netmask */ /* number of real servers */ unsigned int num_dests; @@ -182,8 +182,8 @@ struct ip_vs_service_entry { struct ip_vs_dest_entry { - u_int32_t addr; /* destination address */ - u_int16_t port; + __be32 addr; /* destination address */ + __be16 port; unsigned conn_flags; /* connection flags */ int weight; /* destination weight */ @@ -203,8 +203,8 @@ struct ip_vs_dest_entry { struct ip_vs_get_dests { /* which service: user fills in these */ u_int16_t protocol; - u_int32_t addr; /* virtual address */ - u_int16_t port; + __be32 addr; /* virtual address */ + __be16 port; u_int32_t fwmark; /* firwall mark of service */ /* number of real servers */ @@ -502,12 +502,12 @@ struct ip_vs_conn { struct list_head c_list; /* hashed list heads */ /* Protocol, addresses and port numbers */ - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ - __u16 cport; - __u16 vport; - __u16 dport; + __be32 caddr; /* client address */ + __be32 vaddr; /* virtual address */ + __be32 daddr; /* destination address */ + __be16 cport; + __be16 vport; + __be16 dport; __u16 protocol; /* Which protocol (TCP/UDP) */ /* counter and timer */ @@ -554,12 +554,12 @@ struct ip_vs_service { atomic_t usecnt; /* use counter */ __u16 protocol; /* which protocol (TCP/UDP) */ - __u32 addr; /* IP address for virtual service */ - __u16 port; /* port number for the service */ + __be32 addr; /* IP address for virtual service */ + __be16 port; /* port number for the service */ __u32 fwmark; /* firewall mark of the service */ unsigned flags; /* service status flags */ unsigned timeout; /* persistent timeout in ticks */ - __u32 netmask; /* grouping granularity */ + __be32 netmask; /* grouping granularity */ struct list_head destinations; /* real server d-linked list */ __u32 num_dests; /* number of servers */ @@ -581,8 +581,8 @@ struct ip_vs_dest { struct list_head n_list; /* for the dests in the service */ struct list_head d_list; /* for table with all the dests */ - __u32 addr; /* IP address of the server */ - __u16 port; /* port number of the server */ + __be32 addr; /* IP address of the server */ + __be16 port; /* port number of the server */ volatile unsigned flags; /* dest status flags */ atomic_t conn_flags; /* flags to copy to conn */ atomic_t weight; /* server weight */ @@ -605,8 +605,8 @@ struct ip_vs_dest { /* for virtual service */ struct ip_vs_service *svc; /* service it belongs to */ __u16 protocol; /* which protocol (TCP/UDP) */ - __u32 vaddr; /* virtual IP address */ - __u16 vport; /* virtual port number */ + __be32 vaddr; /* virtual IP address */ + __be16 vport; /* virtual port number */ __u32 vfwmark; /* firewall mark of service */ }; @@ -648,7 +648,7 @@ struct ip_vs_app /* members for application incarnations */ struct list_head p_list; /* member in proto app list */ struct ip_vs_app *app; /* its real application */ - __u16 port; /* port number in net order */ + __be16 port; /* port number in net order */ atomic_t usecnt; /* usage counter */ /* output hook: return false if can't linearize. diff set for TCP. */ @@ -740,11 +740,11 @@ enum { }; extern struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); extern struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); extern struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port); +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port); /* put back the conn without restarting its timer */ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) @@ -752,11 +752,11 @@ static inline void __ip_vs_conn_put(struct ip_vs_conn *cp) atomic_dec(&cp->refcnt); } extern void ip_vs_conn_put(struct ip_vs_conn *cp); -extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport); +extern void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport); extern struct ip_vs_conn * -ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport, - __u32 daddr, __u16 dport, unsigned flags, +ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, + __be32 daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest); extern void ip_vs_conn_expire_now(struct ip_vs_conn *cp); @@ -887,7 +887,7 @@ extern int sysctl_ip_vs_nat_icmp_send; extern struct ip_vs_stats ip_vs_stats; extern struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport); +ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport); static inline void ip_vs_service_put(struct ip_vs_service *svc) { @@ -895,7 +895,7 @@ static inline void ip_vs_service_put(struct ip_vs_service *svc) } extern struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport); +ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport); extern int ip_vs_use_count_inc(void); extern void ip_vs_use_count_dec(void); extern int ip_vs_control_init(void); diff --git a/net/ipv4/ipvs/ip_vs_conn.c b/net/ipv4/ipvs/ip_vs_conn.c index 87b83813cf2c..8832eb517d52 100644 --- a/net/ipv4/ipvs/ip_vs_conn.c +++ b/net/ipv4/ipvs/ip_vs_conn.c @@ -115,9 +115,9 @@ static inline void ct_write_unlock_bh(unsigned key) /* * Returns hash value for IPVS connection entry */ -static unsigned int ip_vs_conn_hashkey(unsigned proto, __u32 addr, __u16 port) +static unsigned int ip_vs_conn_hashkey(unsigned proto, __be32 addr, __be16 port) { - return jhash_3words(addr, port, proto, ip_vs_conn_rnd) + return jhash_3words((__force u32)addr, (__force u32)port, proto, ip_vs_conn_rnd) & IP_VS_CONN_TAB_MASK; } @@ -188,7 +188,7 @@ static inline int ip_vs_conn_unhash(struct ip_vs_conn *cp) * d_addr, d_port: pkt dest address (load balancer) */ static inline struct ip_vs_conn *__ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -215,7 +215,7 @@ static inline struct ip_vs_conn *__ip_vs_conn_in_get } struct ip_vs_conn *ip_vs_conn_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { struct ip_vs_conn *cp; @@ -234,7 +234,7 @@ struct ip_vs_conn *ip_vs_conn_in_get /* Get reference to connection template */ struct ip_vs_conn *ip_vs_ct_in_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp; @@ -274,7 +274,7 @@ struct ip_vs_conn *ip_vs_ct_in_get * d_addr, d_port: pkt dest address (foreign host) */ struct ip_vs_conn *ip_vs_conn_out_get -(int protocol, __u32 s_addr, __u16 s_port, __u32 d_addr, __u16 d_port) +(int protocol, __be32 s_addr, __be16 s_port, __be32 d_addr, __be16 d_port) { unsigned hash; struct ip_vs_conn *cp, *ret=NULL; @@ -324,7 +324,7 @@ void ip_vs_conn_put(struct ip_vs_conn *cp) /* * Fill a no_client_port connection with a client port number */ -void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __u16 cport) +void ip_vs_conn_fill_cport(struct ip_vs_conn *cp, __be16 cport) { if (ip_vs_conn_unhash(cp)) { spin_lock(&cp->lock); @@ -508,10 +508,10 @@ int ip_vs_check_template(struct ip_vs_conn *ct) /* * Invalidate the connection template */ - if (ct->vport != 65535) { + if (ct->vport != htons(0xffff)) { if (ip_vs_conn_unhash(ct)) { - ct->dport = 65535; - ct->vport = 65535; + ct->dport = htons(0xffff); + ct->vport = htons(0xffff); ct->cport = 0; ip_vs_conn_hash(ct); } @@ -596,8 +596,8 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp) * Create a new connection entry and hash it into the ip_vs_conn_tab */ struct ip_vs_conn * -ip_vs_conn_new(int proto, __u32 caddr, __u16 cport, __u32 vaddr, __u16 vport, - __u32 daddr, __u16 dport, unsigned flags, +ip_vs_conn_new(int proto, __be32 caddr, __be16 cport, __be32 vaddr, __be16 vport, + __be32 daddr, __be16 dport, unsigned flags, struct ip_vs_dest *dest) { struct ip_vs_conn *cp; diff --git a/net/ipv4/ipvs/ip_vs_core.c b/net/ipv4/ipvs/ip_vs_core.c index 3f47ad8e1cad..6dee03935f78 100644 --- a/net/ipv4/ipvs/ip_vs_core.c +++ b/net/ipv4/ipvs/ip_vs_core.c @@ -209,14 +209,14 @@ int ip_vs_make_skb_writable(struct sk_buff **pskb, int writable_len) static struct ip_vs_conn * ip_vs_sched_persist(struct ip_vs_service *svc, const struct sk_buff *skb, - __u16 ports[2]) + __be16 ports[2]) { struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; struct ip_vs_conn *ct; - __u16 dport; /* destination port to forward */ - __u32 snet; /* source network of the client, after masking */ + __be16 dport; /* destination port to forward */ + __be32 snet; /* source network of the client, after masking */ /* Mask saddr with the netmask to adjust template granularity */ snet = iph->saddr & svc->netmask; @@ -383,7 +383,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) struct ip_vs_conn *cp = NULL; struct iphdr *iph = skb->nh.iph; struct ip_vs_dest *dest; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, iph->ihl*4, sizeof(_ports), _ports); @@ -446,7 +446,7 @@ ip_vs_schedule(struct ip_vs_service *svc, const struct sk_buff *skb) int ip_vs_leave(struct ip_vs_service *svc, struct sk_buff *skb, struct ip_vs_protocol *pp) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; struct iphdr *iph = skb->nh.iph; pptr = skb_header_pointer(skb, iph->ihl*4, @@ -576,7 +576,7 @@ void ip_vs_nat_icmp(struct sk_buff *skb, struct ip_vs_protocol *pp, /* the TCP/UDP port */ if (IPPROTO_TCP == ciph->protocol || IPPROTO_UDP == ciph->protocol) { - __u16 *ports = (void *)ciph + ciph->ihl*4; + __be16 *ports = (void *)ciph + ciph->ihl*4; if (inout) ports[1] = cp->vport; @@ -775,7 +775,7 @@ ip_vs_out(unsigned int hooknum, struct sk_buff **pskb, if (sysctl_ip_vs_nat_icmp_send && (pp->protocol == IPPROTO_TCP || pp->protocol == IPPROTO_UDP)) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, ihl, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_ctl.c b/net/ipv4/ipvs/ip_vs_ctl.c index 6a28fafe910c..f261616e4602 100644 --- a/net/ipv4/ipvs/ip_vs_ctl.c +++ b/net/ipv4/ipvs/ip_vs_ctl.c @@ -283,7 +283,7 @@ static atomic_t ip_vs_nullsvc_counter = ATOMIC_INIT(0); * Returns hash value for virtual service */ static __inline__ unsigned -ip_vs_svc_hashkey(unsigned proto, __u32 addr, __u16 port) +ip_vs_svc_hashkey(unsigned proto, __be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -365,7 +365,7 @@ static int ip_vs_svc_unhash(struct ip_vs_service *svc) * Get service by {proto,addr,port} in the service table. */ static __inline__ struct ip_vs_service * -__ip_vs_service_get(__u16 protocol, __u32 vaddr, __u16 vport) +__ip_vs_service_get(__u16 protocol, __be32 vaddr, __be16 vport) { unsigned hash; struct ip_vs_service *svc; @@ -410,7 +410,7 @@ static __inline__ struct ip_vs_service *__ip_vs_svc_fwm_get(__u32 fwmark) } struct ip_vs_service * -ip_vs_service_get(__u32 fwmark, __u16 protocol, __u32 vaddr, __u16 vport) +ip_vs_service_get(__u32 fwmark, __u16 protocol, __be32 vaddr, __be16 vport) { struct ip_vs_service *svc; @@ -480,7 +480,7 @@ __ip_vs_unbind_svc(struct ip_vs_dest *dest) /* * Returns hash value for real service */ -static __inline__ unsigned ip_vs_rs_hashkey(__u32 addr, __u16 port) +static __inline__ unsigned ip_vs_rs_hashkey(__be32 addr, __be16 port) { register unsigned porth = ntohs(port); @@ -531,7 +531,7 @@ static int ip_vs_rs_unhash(struct ip_vs_dest *dest) * Lookup real service by in the real service table. */ struct ip_vs_dest * -ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) +ip_vs_lookup_real_service(__u16 protocol, __be32 daddr, __be16 dport) { unsigned hash; struct ip_vs_dest *dest; @@ -562,7 +562,7 @@ ip_vs_lookup_real_service(__u16 protocol, __u32 daddr, __u16 dport) * Lookup destination by {addr,port} in the given service */ static struct ip_vs_dest * -ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_lookup_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest; @@ -591,7 +591,7 @@ ip_vs_lookup_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) * scheduling. */ static struct ip_vs_dest * -ip_vs_trash_get_dest(struct ip_vs_service *svc, __u32 daddr, __u16 dport) +ip_vs_trash_get_dest(struct ip_vs_service *svc, __be32 daddr, __be16 dport) { struct ip_vs_dest *dest, *nxt; @@ -773,8 +773,8 @@ static int ip_vs_add_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; int ret; EnterFunction(2); @@ -879,8 +879,8 @@ static int ip_vs_edit_dest(struct ip_vs_service *svc, struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); @@ -991,8 +991,8 @@ static int ip_vs_del_dest(struct ip_vs_service *svc,struct ip_vs_dest_user *udest) { struct ip_vs_dest *dest; - __u32 daddr = udest->addr; - __u16 dport = udest->port; + __be32 daddr = udest->addr; + __be16 dport = udest->port; EnterFunction(2); diff --git a/net/ipv4/ipvs/ip_vs_dh.c b/net/ipv4/ipvs/ip_vs_dh.c index 9fee19c4c617..502111fba872 100644 --- a/net/ipv4/ipvs/ip_vs_dh.c +++ b/net/ipv4/ipvs/ip_vs_dh.c @@ -66,7 +66,7 @@ struct ip_vs_dh_bucket { /* * Returns hash value for IPVS DH entry */ -static inline unsigned ip_vs_dh_hashkey(__u32 addr) +static inline unsigned ip_vs_dh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_DH_TAB_MASK; } @@ -76,7 +76,7 @@ static inline unsigned ip_vs_dh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __u32 addr) +ip_vs_dh_get(struct ip_vs_dh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_dh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_ftp.c b/net/ipv4/ipvs/ip_vs_ftp.c index 62505d8f9ff6..b3bbb4a07ae0 100644 --- a/net/ipv4/ipvs/ip_vs_ftp.c +++ b/net/ipv4/ipvs/ip_vs_ftp.c @@ -74,7 +74,7 @@ ip_vs_ftp_done_conn(struct ip_vs_app *app, struct ip_vs_conn *cp) */ static int ip_vs_ftp_get_addrport(char *data, char *data_limit, const char *pattern, size_t plen, char term, - __u32 *addr, __u16 *port, + __be32 *addr, __be16 *port, char **start, char **end) { unsigned char p[6]; @@ -140,8 +140,8 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_limit; char *start, *end; - __u32 from; - __u16 port; + __be32 from; + __be16 port; struct ip_vs_conn *n_cp; char buf[24]; /* xxx.xxx.xxx.xxx,ppp,ppp\000 */ unsigned buf_len; @@ -199,7 +199,7 @@ static int ip_vs_ftp_out(struct ip_vs_app *app, struct ip_vs_conn *cp, from = n_cp->vaddr; port = n_cp->vport; sprintf(buf,"%d,%d,%d,%d,%d,%d", NIPQUAD(from), - port&255, (port>>8)&255); + ntohs(port)&255, (ntohs(port)>>8)&255); buf_len = strlen(buf); /* @@ -243,8 +243,8 @@ static int ip_vs_ftp_in(struct ip_vs_app *app, struct ip_vs_conn *cp, struct tcphdr *th; char *data, *data_start, *data_limit; char *start, *end; - __u32 to; - __u16 port; + __be32 to; + __be16 port; struct ip_vs_conn *n_cp; /* no diff required for incoming packets */ diff --git a/net/ipv4/ipvs/ip_vs_lblc.c b/net/ipv4/ipvs/ip_vs_lblc.c index 6e5cb92a5c83..524751e031de 100644 --- a/net/ipv4/ipvs/ip_vs_lblc.c +++ b/net/ipv4/ipvs/ip_vs_lblc.c @@ -87,7 +87,7 @@ static int sysctl_ip_vs_lblc_expiration = 24*60*60*HZ; */ struct ip_vs_lblc_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest *dest; /* real server (cache) */ unsigned long lastuse; /* last used time */ }; @@ -160,7 +160,7 @@ static struct ctl_table_header * sysctl_header; * IP address to a server. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_new(__u32 daddr, struct ip_vs_dest *dest) +ip_vs_lblc_new(__be32 daddr, struct ip_vs_dest *dest) { struct ip_vs_lblc_entry *en; @@ -195,7 +195,7 @@ static inline void ip_vs_lblc_free(struct ip_vs_lblc_entry *en) /* * Returns hash value for IPVS LBLC entry */ -static inline unsigned ip_vs_lblc_hashkey(__u32 addr) +static inline unsigned ip_vs_lblc_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLC_TAB_MASK; } @@ -234,7 +234,7 @@ ip_vs_lblc_hash(struct ip_vs_lblc_table *tbl, struct ip_vs_lblc_entry *en) * Get ip_vs_lblc_entry associated with supplied parameters. */ static inline struct ip_vs_lblc_entry * -ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __u32 addr) +ip_vs_lblc_get(struct ip_vs_lblc_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblc_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_lblcr.c b/net/ipv4/ipvs/ip_vs_lblcr.c index 32ba37ba72d8..08990192b6ec 100644 --- a/net/ipv4/ipvs/ip_vs_lblcr.c +++ b/net/ipv4/ipvs/ip_vs_lblcr.c @@ -276,7 +276,7 @@ static inline struct ip_vs_dest *ip_vs_dest_set_max(struct ip_vs_dest_set *set) */ struct ip_vs_lblcr_entry { struct list_head list; - __u32 addr; /* destination IP address */ + __be32 addr; /* destination IP address */ struct ip_vs_dest_set set; /* destination server set */ unsigned long lastuse; /* last used time */ }; @@ -348,7 +348,7 @@ static struct ctl_table_header * sysctl_header; * new/free a ip_vs_lblcr_entry, which is a mapping of a destination * IP address to a server. */ -static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__u32 daddr) +static inline struct ip_vs_lblcr_entry *ip_vs_lblcr_new(__be32 daddr) { struct ip_vs_lblcr_entry *en; @@ -381,7 +381,7 @@ static inline void ip_vs_lblcr_free(struct ip_vs_lblcr_entry *en) /* * Returns hash value for IPVS LBLCR entry */ -static inline unsigned ip_vs_lblcr_hashkey(__u32 addr) +static inline unsigned ip_vs_lblcr_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_LBLCR_TAB_MASK; } @@ -420,7 +420,7 @@ ip_vs_lblcr_hash(struct ip_vs_lblcr_table *tbl, struct ip_vs_lblcr_entry *en) * Get ip_vs_lblcr_entry associated with supplied parameters. */ static inline struct ip_vs_lblcr_entry * -ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __u32 addr) +ip_vs_lblcr_get(struct ip_vs_lblcr_table *tbl, __be32 addr) { unsigned hash; struct ip_vs_lblcr_entry *en; diff --git a/net/ipv4/ipvs/ip_vs_proto.c b/net/ipv4/ipvs/ip_vs_proto.c index 867d4e9c6594..c4528b5c800d 100644 --- a/net/ipv4/ipvs/ip_vs_proto.c +++ b/net/ipv4/ipvs/ip_vs_proto.c @@ -176,7 +176,7 @@ ip_vs_tcpudp_debug_packet(struct ip_vs_protocol *pp, pp->name, NIPQUAD(ih->saddr), NIPQUAD(ih->daddr)); else { - __u16 _ports[2], *pptr + __be16 _ports[2], *pptr ; pptr = skb_header_pointer(skb, offset + ih->ihl*4, sizeof(_ports), _ports); diff --git a/net/ipv4/ipvs/ip_vs_proto_tcp.c b/net/ipv4/ipvs/ip_vs_proto_tcp.c index 820e8318d10d..bfe779e74590 100644 --- a/net/ipv4/ipvs/ip_vs_proto_tcp.c +++ b/net/ipv4/ipvs/ip_vs_proto_tcp.c @@ -29,7 +29,7 @@ static struct ip_vs_conn * tcp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -50,7 +50,7 @@ static struct ip_vs_conn * tcp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -112,12 +112,12 @@ tcp_conn_schedule(struct sk_buff *skb, static inline void -tcp_fast_csum_update(struct tcphdr *tcph, u32 oldip, u32 newip, - u16 oldport, u16 newport) +tcp_fast_csum_update(struct tcphdr *tcph, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { tcph->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, tcph->check)); } diff --git a/net/ipv4/ipvs/ip_vs_proto_udp.c b/net/ipv4/ipvs/ip_vs_proto_udp.c index 90c8166c0ec1..54aa7603591f 100644 --- a/net/ipv4/ipvs/ip_vs_proto_udp.c +++ b/net/ipv4/ipvs/ip_vs_proto_udp.c @@ -29,7 +29,7 @@ udp_conn_in_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, proto_off, sizeof(_ports), _ports); if (pptr == NULL) @@ -54,7 +54,7 @@ udp_conn_out_get(const struct sk_buff *skb, struct ip_vs_protocol *pp, const struct iphdr *iph, unsigned int proto_off, int inverse) { struct ip_vs_conn *cp; - __u16 _ports[2], *pptr; + __be16 _ports[2], *pptr; pptr = skb_header_pointer(skb, skb->nh.iph->ihl*4, sizeof(_ports), _ports); @@ -117,15 +117,15 @@ udp_conn_schedule(struct sk_buff *skb, struct ip_vs_protocol *pp, static inline void -udp_fast_csum_update(struct udphdr *uhdr, u32 oldip, u32 newip, - u16 oldport, u16 newport) +udp_fast_csum_update(struct udphdr *uhdr, __be32 oldip, __be32 newip, + __be16 oldport, __be16 newport) { uhdr->check = ip_vs_check_diff(~oldip, newip, - ip_vs_check_diff(oldport ^ 0xFFFF, + ip_vs_check_diff(oldport ^ htonl(0xFFFF), newport, uhdr->check)); if (!uhdr->check) - uhdr->check = 0xFFFF; + uhdr->check = htonl(0xFFFF); } static int @@ -173,7 +173,7 @@ udp_snat_handler(struct sk_buff **pskb, cp->protocol, (*pskb)->csum); if (udph->check == 0) - udph->check = 0xFFFF; + udph->check = htonl(0xFFFF); IP_VS_DBG(11, "O-pkt: %s O-csum=%d (+%zd)\n", pp->name, udph->check, (char*)&(udph->check) - (char*)udph); diff --git a/net/ipv4/ipvs/ip_vs_sh.c b/net/ipv4/ipvs/ip_vs_sh.c index 7775e6cc68be..338668f88fe2 100644 --- a/net/ipv4/ipvs/ip_vs_sh.c +++ b/net/ipv4/ipvs/ip_vs_sh.c @@ -63,7 +63,7 @@ struct ip_vs_sh_bucket { /* * Returns hash value for IPVS SH entry */ -static inline unsigned ip_vs_sh_hashkey(__u32 addr) +static inline unsigned ip_vs_sh_hashkey(__be32 addr) { return (ntohl(addr)*2654435761UL) & IP_VS_SH_TAB_MASK; } @@ -73,7 +73,7 @@ static inline unsigned ip_vs_sh_hashkey(__u32 addr) * Get ip_vs_dest associated with supplied parameters. */ static inline struct ip_vs_dest * -ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __u32 addr) +ip_vs_sh_get(struct ip_vs_sh_bucket *tbl, __be32 addr) { return (tbl[ip_vs_sh_hashkey(addr)]).dest; } diff --git a/net/ipv4/ipvs/ip_vs_sync.c b/net/ipv4/ipvs/ip_vs_sync.c index 0c8d20da6139..6ab57d72b615 100644 --- a/net/ipv4/ipvs/ip_vs_sync.c +++ b/net/ipv4/ipvs/ip_vs_sync.c @@ -48,16 +48,16 @@ struct ip_vs_sync_conn { /* Protocol, addresses and port numbers */ __u8 protocol; /* Which protocol (TCP/UDP) */ - __u16 cport; - __u16 vport; - __u16 dport; - __u32 caddr; /* client address */ - __u32 vaddr; /* virtual address */ - __u32 daddr; /* destination address */ + __be16 cport; + __be16 vport; + __be16 dport; + __be32 caddr; /* client address */ + __be32 vaddr; /* virtual address */ + __be32 daddr; /* destination address */ /* Flags and state transition */ - __u16 flags; /* status flags */ - __u16 state; /* state info */ + __be16 flags; /* status flags */ + __be16 state; /* state info */ /* The sequence options start here */ }; diff --git a/net/ipv4/ipvs/ip_vs_xmit.c b/net/ipv4/ipvs/ip_vs_xmit.c index 52c12e9edbbc..e1f77bd7c9a5 100644 --- a/net/ipv4/ipvs/ip_vs_xmit.c +++ b/net/ipv4/ipvs/ip_vs_xmit.c @@ -232,7 +232,7 @@ ip_vs_nat_xmit(struct sk_buff *skb, struct ip_vs_conn *cp, /* check if it is a connection of no-client-port */ if (unlikely(cp->flags & IP_VS_CONN_F_NO_CPORT)) { - __u16 _pt, *p; + __be16 _pt, *p; p = skb_header_pointer(skb, iph->ihl*4, sizeof(_pt), &_pt); if (p == NULL) goto tx_error; -- cgit v1.3-8-gc7d7 From 32f50cdee666333168b5203c7864bede159f789e Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Thu, 28 Sep 2006 14:51:47 -0700 Subject: [NetLabel]: add audit support for configuration changes This patch adds audit support to NetLabel, including six new audit message types shown below. #define AUDIT_MAC_UNLBL_ACCEPT 1406 #define AUDIT_MAC_UNLBL_DENY 1407 #define AUDIT_MAC_CIPSOV4_ADD 1408 #define AUDIT_MAC_CIPSOV4_DEL 1409 #define AUDIT_MAC_MAP_ADD 1410 #define AUDIT_MAC_MAP_DEL 1411 Signed-off-by: Paul Moore Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/audit.h | 6 +++ include/net/cipso_ipv4.h | 5 ++- include/net/netlabel.h | 2 +- net/ipv4/cipso_ipv4.c | 8 +++- net/netlabel/netlabel_cipso_v4.c | 43 +++++++++++++----- net/netlabel/netlabel_domainhash.c | 54 +++++++++++++++++++--- net/netlabel/netlabel_domainhash.h | 6 +-- net/netlabel/netlabel_mgmt.c | 14 +++--- net/netlabel/netlabel_unlabeled.c | 36 ++++++++++++--- net/netlabel/netlabel_user.c | 91 ++++++++++++++++++++++++++++++++++++++ net/netlabel/netlabel_user.h | 6 +++ 11 files changed, 235 insertions(+), 36 deletions(-) (limited to 'include/net') diff --git a/include/linux/audit.h b/include/linux/audit.h index 40a6c26294ae..42719d07612a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -95,6 +95,12 @@ #define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ +#define AUDIT_MAC_UNLBL_ACCEPT 1406 /* NetLabel: allow unlabeled traffic */ +#define AUDIT_MAC_UNLBL_DENY 1407 /* NetLabel: deny unlabeled traffic */ +#define AUDIT_MAC_CIPSOV4_ADD 1408 /* NetLabel: add CIPSOv4 DOI entry */ +#define AUDIT_MAC_CIPSOV4_DEL 1409 /* NetLabel: del CIPSOv4 DOI entry */ +#define AUDIT_MAC_MAP_ADD 1410 /* NetLabel: add LSM domain mapping */ +#define AUDIT_MAC_MAP_DEL 1411 /* NetLabel: del LSM domain mapping */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 2d72496c2029..5d6ae1b2b196 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -128,7 +128,9 @@ extern int cipso_v4_rbm_strictvalid; #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)); +int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, + void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); int cipso_v4_doi_walk(u32 *skip_cnt, int (*callback) (struct cipso_v4_doi *doi_def, void *arg), @@ -143,6 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } static inline int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, void (*callback) (struct rcu_head * head)) { return 0; diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 6692430063fd..190bfdbbdba6 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -96,7 +96,7 @@ struct netlbl_dom_map; /* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain); +int netlbl_domhsh_remove(const char *domain, u32 audit_secid); /* LSM security attributes */ struct netlbl_lsm_cache { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index e6ce0b3ba62a..c4e469ff842d 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -474,6 +474,7 @@ doi_add_failure_rlock: /** * cipso_v4_doi_remove - Remove an existing DOI from the CIPSO protocol engine * @doi: the DOI value + * @audit_secid: the LSM secid to use in the audit message * @callback: the DOI cleanup/free callback * * Description: @@ -483,7 +484,9 @@ doi_add_failure_rlock: * success and negative values on failure. * */ -int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) +int cipso_v4_doi_remove(u32 doi, + u32 audit_secid, + void (*callback) (struct rcu_head * head)) { struct cipso_v4_doi *doi_def; struct cipso_v4_domhsh_entry *dom_iter; @@ -502,7 +505,8 @@ int cipso_v4_doi_remove(u32 doi, void (*callback) (struct rcu_head * head)) spin_unlock(&cipso_v4_doi_list_lock); list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) - netlbl_domhsh_remove(dom_iter->domain); + netlbl_domhsh_remove(dom_iter->domain, + audit_secid); cipso_v4_cache_invalidate(); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 4125a55f469f..09986ca962a6 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include @@ -162,8 +163,7 @@ static int netlbl_cipsov4_add_std(struct genl_info *info) int nla_a_rem; int nla_b_rem; - if (!info->attrs[NLBL_CIPSOV4_A_DOI] || - !info->attrs[NLBL_CIPSOV4_A_TAGLST] || + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST] || !info->attrs[NLBL_CIPSOV4_A_MLSLVLLST]) return -EINVAL; @@ -344,8 +344,7 @@ static int netlbl_cipsov4_add_pass(struct genl_info *info) int ret_val; struct cipso_v4_doi *doi_def = NULL; - if (!info->attrs[NLBL_CIPSOV4_A_DOI] || - !info->attrs[NLBL_CIPSOV4_A_TAGLST]) + if (!info->attrs[NLBL_CIPSOV4_A_TAGLST]) return -EINVAL; doi_def = kmalloc(sizeof(*doi_def), GFP_KERNEL); @@ -381,21 +380,35 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - u32 map_type; + u32 type; + u32 doi; + const char *type_str = "(unknown)"; + struct audit_buffer *audit_buf; - if (!info->attrs[NLBL_CIPSOV4_A_MTYPE]) + if (!info->attrs[NLBL_CIPSOV4_A_DOI] || + !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; - map_type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); - switch (map_type) { + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); + switch (type) { case CIPSO_V4_MAP_STD: + type_str = "std"; ret_val = netlbl_cipsov4_add_std(info); break; case CIPSO_V4_MAP_PASS: + type_str = "pass"; ret_val = netlbl_cipsov4_add_pass(info); break; } + if (ret_val == 0) { + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + NETLINK_CB(skb).sid); + audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str); + audit_log_end(audit_buf); + } + return ret_val; } @@ -653,11 +666,21 @@ static int netlbl_cipsov4_listall(struct sk_buff *skb, static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) { int ret_val = -EINVAL; - u32 doi; + u32 doi = 0; + struct audit_buffer *audit_buf; if (info->attrs[NLBL_CIPSOV4_A_DOI]) { doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - ret_val = cipso_v4_doi_remove(doi, netlbl_cipsov4_doi_free); + ret_val = cipso_v4_doi_remove(doi, + NETLINK_CB(skb).sid, + netlbl_cipsov4_doi_free); + } + + if (ret_val == 0) { + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + NETLINK_CB(skb).sid); + audit_log_format(audit_buf, " doi=%u", doi); + audit_log_end(audit_buf); } return ret_val; diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index f56d7a8ac7b7..d64e2ae3b129 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -35,12 +35,14 @@ #include #include #include +#include #include #include #include #include "netlabel_mgmt.h" #include "netlabel_domainhash.h" +#include "netlabel_user.h" struct netlbl_domhsh_tbl { struct list_head *tbl; @@ -186,6 +188,7 @@ int netlbl_domhsh_init(u32 size) /** * netlbl_domhsh_add - Adds a entry to the domain hash table * @entry: the entry to add + * @audit_secid: the LSM secid to use in the audit message * * Description: * Adds a new entry to the domain hash table and handles any updates to the @@ -193,10 +196,12 @@ int netlbl_domhsh_init(u32 size) * negative on failure. * */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry) +int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) { int ret_val; u32 bkt; + struct audit_buffer *audit_buf; + char *audit_domain; switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: @@ -236,6 +241,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) spin_unlock(&netlbl_domhsh_def_lock); } else ret_val = -EINVAL; + if (ret_val == 0) { + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, + audit_secid); + audit_log_format(audit_buf, " domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " protocol=cipsov4 doi=%u", + entry->type_def.cipsov4->doi); + break; + } + audit_log_end(audit_buf); + } rcu_read_unlock(); if (ret_val != 0) { @@ -254,6 +279,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) /** * netlbl_domhsh_add_default - Adds the default entry to the domain hash table * @entry: the entry to add + * @audit_secid: the LSM secid to use in the audit message * * Description: * Adds a new default entry to the domain hash table and handles any updates @@ -261,14 +287,15 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry) * negative on failure. * */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) { - return netlbl_domhsh_add(entry); + return netlbl_domhsh_add(entry, audit_secid); } /** * netlbl_domhsh_remove - Removes an entry from the domain hash table * @domain: the domain to remove + * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes an entry from the domain hash table and handles any updates to the @@ -276,10 +303,12 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry) * negative on failure. * */ -int netlbl_domhsh_remove(const char *domain) +int netlbl_domhsh_remove(const char *domain, u32 audit_secid) { int ret_val = -ENOENT; struct netlbl_dom_map *entry; + struct audit_buffer *audit_buf; + char *audit_domain; rcu_read_lock(); if (domain != NULL) @@ -316,8 +345,18 @@ int netlbl_domhsh_remove(const char *domain) ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_def_lock); } - if (ret_val == 0) + if (ret_val == 0) { + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, + audit_secid); + audit_log_format(audit_buf, " domain=%s", audit_domain); + audit_log_end(audit_buf); + call_rcu(&entry->rcu, netlbl_domhsh_free_entry); + } remove_return: rcu_read_unlock(); @@ -326,6 +365,7 @@ remove_return: /** * netlbl_domhsh_remove_default - Removes the default entry from the table + * @audit_secid: the LSM secid to use in the audit message * * Description: * Removes/resets the default entry for the domain hash table and handles any @@ -333,9 +373,9 @@ remove_return: * success, non-zero on failure. * */ -int netlbl_domhsh_remove_default(void) +int netlbl_domhsh_remove_default(u32 audit_secid) { - return netlbl_domhsh_remove(NULL); + return netlbl_domhsh_remove(NULL, audit_secid); } /** diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index 02af72a7877c..d50f13cacdca 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -57,9 +57,9 @@ struct netlbl_dom_map { int netlbl_domhsh_init(u32 size); /* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry); -int netlbl_domhsh_remove_default(void); +int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid); +int netlbl_domhsh_remove_default(u32 audit_secid); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 8626c9f678eb..0ac314f18ad1 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -108,7 +108,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry); + ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -125,7 +125,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto add_failure; } - ret_val = netlbl_domhsh_add(entry); + ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); rcu_read_unlock(); break; default: @@ -161,7 +161,7 @@ static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) return -EINVAL; domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); - return netlbl_domhsh_remove(domain); + return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid); } /** @@ -277,7 +277,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, + NETLINK_CB(skb).sid); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -294,7 +295,8 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto adddef_failure; } - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, + NETLINK_CB(skb).sid); rcu_read_unlock(); break; default: @@ -322,7 +324,7 @@ adddef_failure: */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - return netlbl_domhsh_remove_default(); + return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid); } /** diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index 440f5c4e1e2d..ab36675fee8c 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -63,6 +63,27 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { [NLBL_UNLABEL_A_ACPTFLG] = { .type = NLA_U8 }, }; +/* + * Helper Functions + */ + +/** + * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag + * @value: desired value + * @audit_secid: the LSM secid to use in the audit message + * + * Description: + * Set the value of the unlabeled accept flag to @value. + * + */ +static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) +{ + atomic_set(&netlabel_unlabel_accept_flg, value); + netlbl_audit_nomsg((value ? + AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY), + audit_secid); +} + /* * NetLabel Command Handlers */ @@ -79,18 +100,18 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { */ static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { - int ret_val = -EINVAL; u8 value; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - atomic_set(&netlabel_unlabel_accept_flg, value); - ret_val = 0; + netlbl_unlabel_acceptflg_set(value, + NETLINK_CB(skb).sid); + return 0; } } - return ret_val; + return -EINVAL; } /** @@ -229,16 +250,19 @@ int netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; + u32 secid; + + security_task_getsecid(current, &secid); entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry); + ret_val = netlbl_domhsh_add_default(entry, secid); if (ret_val != 0) return ret_val; - atomic_set(&netlabel_unlabel_accept_flg, 1); + netlbl_unlabel_acceptflg_set(1, secid); return 0; } diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index eeb7d768d2bb..c2343af584cb 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -32,6 +32,9 @@ #include #include #include +#include +#include +#include #include #include #include @@ -74,3 +77,91 @@ int netlbl_netlink_init(void) return 0; } + +/* + * NetLabel Audit Functions + */ + +/** + * netlbl_audit_start_common - Start an audit message + * @type: audit message type + * @secid: LSM context ID + * + * Description: + * Start an audit message using the type specified in @type and fill the audit + * message with some fields common to all NetLabel audit messages. Returns + * a pointer to the audit buffer on success, NULL on failure. + * + */ +struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) +{ + struct audit_context *audit_ctx = current->audit_context; + struct audit_buffer *audit_buf; + uid_t audit_loginuid; + const char *audit_tty; + char audit_comm[sizeof(current->comm)]; + struct vm_area_struct *vma; + char *secctx; + u32 secctx_len; + + audit_buf = audit_log_start(audit_ctx, GFP_ATOMIC, type); + if (audit_buf == NULL) + return NULL; + + audit_loginuid = audit_get_loginuid(audit_ctx); + if (current->signal && + current->signal->tty && + current->signal->tty->name) + audit_tty = current->signal->tty->name; + else + audit_tty = "(none)"; + get_task_comm(audit_comm, current); + + audit_log_format(audit_buf, + "netlabel: auid=%u uid=%u tty=%s pid=%d", + audit_loginuid, + current->uid, + audit_tty, + current->pid); + audit_log_format(audit_buf, " comm="); + audit_log_untrustedstring(audit_buf, audit_comm); + if (current->mm) { + down_read(¤t->mm->mmap_sem); + vma = current->mm->mmap; + while (vma) { + if ((vma->vm_flags & VM_EXECUTABLE) && + vma->vm_file) { + audit_log_d_path(audit_buf, + " exe=", + vma->vm_file->f_dentry, + vma->vm_file->f_vfsmnt); + break; + } + vma = vma->vm_next; + } + up_read(¤t->mm->mmap_sem); + } + + if (secid != 0 && + security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) + audit_log_format(audit_buf, " subj=%s", secctx); + + return audit_buf; +} + +/** + * netlbl_audit_nomsg - Send an audit message without additional text + * @type: audit message type + * @secid: LSM context ID + * + * Description: + * Send an audit message with only the common NetLabel audit fields. + * + */ +void netlbl_audit_nomsg(int type, u32 secid) +{ + struct audit_buffer *audit_buf; + + audit_buf = netlbl_audit_start_common(type, secid); + audit_log_end(audit_buf); +} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index 3f9386b917df..ab840acfc964 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include @@ -75,4 +76,9 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, int netlbl_netlink_init(void); +/* NetLabel Audit Functions */ + +struct audit_buffer *netlbl_audit_start_common(int type, u32 secid); +void netlbl_audit_nomsg(int type, u32 secid); + #endif -- cgit v1.3-8-gc7d7 From 95d4e6be25a68cd9fbe8c0d356b585504d8db1c7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Fri, 29 Sep 2006 17:05:05 -0700 Subject: [NetLabel]: audit fixups due to delayed feedback Fix some issues Steve Grubb had with the way NetLabel was using the audit subsystem. This should make NetLabel more consistent with other kernel generated audit messages specifying configuration changes. Signed-off-by: Paul Moore Acked-by: Steve Grubb Signed-off-by: David S. Miller --- include/linux/audit.h | 11 +++-- include/net/cipso_ipv4.h | 4 +- include/net/netlabel.h | 8 +++- net/ipv4/cipso_ipv4.c | 4 +- net/netlabel/netlabel_cipso_v4.c | 48 +++++++++++++--------- net/netlabel/netlabel_domainhash.c | 82 ++++++++++++++++++++------------------ net/netlabel/netlabel_domainhash.h | 8 ++-- net/netlabel/netlabel_mgmt.c | 27 +++++++++---- net/netlabel/netlabel_unlabeled.c | 34 +++++++++++----- net/netlabel/netlabel_user.c | 66 ++++-------------------------- net/netlabel/netlabel_user.h | 16 +++++++- 11 files changed, 157 insertions(+), 151 deletions(-) (limited to 'include/net') diff --git a/include/linux/audit.h b/include/linux/audit.h index 42719d07612a..c3aa09751814 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -95,12 +95,11 @@ #define AUDIT_MAC_POLICY_LOAD 1403 /* Policy file load */ #define AUDIT_MAC_STATUS 1404 /* Changed enforcing,permissive,off */ #define AUDIT_MAC_CONFIG_CHANGE 1405 /* Changes to booleans */ -#define AUDIT_MAC_UNLBL_ACCEPT 1406 /* NetLabel: allow unlabeled traffic */ -#define AUDIT_MAC_UNLBL_DENY 1407 /* NetLabel: deny unlabeled traffic */ -#define AUDIT_MAC_CIPSOV4_ADD 1408 /* NetLabel: add CIPSOv4 DOI entry */ -#define AUDIT_MAC_CIPSOV4_DEL 1409 /* NetLabel: del CIPSOv4 DOI entry */ -#define AUDIT_MAC_MAP_ADD 1410 /* NetLabel: add LSM domain mapping */ -#define AUDIT_MAC_MAP_DEL 1411 /* NetLabel: del LSM domain mapping */ +#define AUDIT_MAC_UNLBL_ALLOW 1406 /* NetLabel: allow unlabeled traffic */ +#define AUDIT_MAC_CIPSOV4_ADD 1407 /* NetLabel: add CIPSOv4 DOI entry */ +#define AUDIT_MAC_CIPSOV4_DEL 1408 /* NetLabel: del CIPSOv4 DOI entry */ +#define AUDIT_MAC_MAP_ADD 1409 /* NetLabel: add LSM domain mapping */ +#define AUDIT_MAC_MAP_DEL 1410 /* NetLabel: del LSM domain mapping */ #define AUDIT_FIRST_KERN_ANOM_MSG 1700 #define AUDIT_LAST_KERN_ANOM_MSG 1799 diff --git a/include/net/cipso_ipv4.h b/include/net/cipso_ipv4.h index 5d6ae1b2b196..718b4d9c891f 100644 --- a/include/net/cipso_ipv4.h +++ b/include/net/cipso_ipv4.h @@ -129,7 +129,7 @@ extern int cipso_v4_rbm_strictvalid; #ifdef CONFIG_NETLABEL int cipso_v4_doi_add(struct cipso_v4_doi *doi_def); int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)); struct cipso_v4_doi *cipso_v4_doi_getdef(u32 doi); int cipso_v4_doi_walk(u32 *skip_cnt, @@ -145,7 +145,7 @@ static inline int cipso_v4_doi_add(struct cipso_v4_doi *doi_def) } static inline int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { return 0; diff --git a/include/net/netlabel.h b/include/net/netlabel.h index 190bfdbbdba6..c63a58058e21 100644 --- a/include/net/netlabel.h +++ b/include/net/netlabel.h @@ -92,11 +92,17 @@ * */ +/* NetLabel audit information */ +struct netlbl_audit { + u32 secid; + uid_t loginuid; +}; + /* Domain mapping definition struct */ struct netlbl_dom_map; /* Domain mapping operations */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid); +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info); /* LSM security attributes */ struct netlbl_lsm_cache { diff --git a/net/ipv4/cipso_ipv4.c b/net/ipv4/cipso_ipv4.c index c4e469ff842d..a8e2e879a647 100644 --- a/net/ipv4/cipso_ipv4.c +++ b/net/ipv4/cipso_ipv4.c @@ -485,7 +485,7 @@ doi_add_failure_rlock: * */ int cipso_v4_doi_remove(u32 doi, - u32 audit_secid, + struct netlbl_audit *audit_info, void (*callback) (struct rcu_head * head)) { struct cipso_v4_doi *doi_def; @@ -506,7 +506,7 @@ int cipso_v4_doi_remove(u32 doi, list_for_each_entry_rcu(dom_iter, &doi_def->dom_list, list) if (dom_iter->valid) netlbl_domhsh_remove(dom_iter->domain, - audit_secid); + audit_info); cipso_v4_cache_invalidate(); rcu_read_unlock(); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 09986ca962a6..a6ce1d6d5c59 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -384,11 +384,15 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) u32 doi; const char *type_str = "(unknown)"; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_CIPSOV4_A_DOI] || !info->attrs[NLBL_CIPSOV4_A_MTYPE]) return -EINVAL; + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + type = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_MTYPE]); switch (type) { case CIPSO_V4_MAP_STD: @@ -401,13 +405,14 @@ static int netlbl_cipsov4_add(struct sk_buff *skb, struct genl_info *info) break; } - if (ret_val == 0) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u type=%s", doi, type_str); - audit_log_end(audit_buf); - } + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_ADD, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u cipso_type=%s res=%u", + doi, + type_str, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } @@ -668,20 +673,25 @@ static int netlbl_cipsov4_remove(struct sk_buff *skb, struct genl_info *info) int ret_val = -EINVAL; u32 doi = 0; struct audit_buffer *audit_buf; + struct netlbl_audit audit_info; - if (info->attrs[NLBL_CIPSOV4_A_DOI]) { - doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); - ret_val = cipso_v4_doi_remove(doi, - NETLINK_CB(skb).sid, - netlbl_cipsov4_doi_free); - } + if (!info->attrs[NLBL_CIPSOV4_A_DOI]) + return -EINVAL; - if (ret_val == 0) { - audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, - NETLINK_CB(skb).sid); - audit_log_format(audit_buf, " doi=%u", doi); - audit_log_end(audit_buf); - } + doi = nla_get_u32(info->attrs[NLBL_CIPSOV4_A_DOI]); + netlbl_netlink_auditinfo(skb, &audit_info); + + ret_val = cipso_v4_doi_remove(doi, + &audit_info, + netlbl_cipsov4_doi_free); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_CIPSOV4_DEL, + &audit_info); + audit_log_format(audit_buf, + " cipso_doi=%u res=%u", + doi, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); return ret_val; } diff --git a/net/netlabel/netlabel_domainhash.c b/net/netlabel/netlabel_domainhash.c index d64e2ae3b129..af4371d3b459 100644 --- a/net/netlabel/netlabel_domainhash.c +++ b/net/netlabel/netlabel_domainhash.c @@ -188,7 +188,7 @@ int netlbl_domhsh_init(u32 size) /** * netlbl_domhsh_add - Adds a entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new entry to the domain hash table and handles any updates to the @@ -196,7 +196,8 @@ int netlbl_domhsh_init(u32 size) * negative on failure. * */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { int ret_val; u32 bkt; @@ -241,26 +242,26 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) spin_unlock(&netlbl_domhsh_def_lock); } else ret_val = -EINVAL; - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - switch (entry->type) { - case NETLBL_NLTYPE_UNLABELED: - audit_log_format(audit_buf, " protocol=unlbl"); - break; - case NETLBL_NLTYPE_CIPSOV4: - audit_log_format(audit_buf, - " protocol=cipsov4 doi=%u", - entry->type_def.cipsov4->doi); - break; - } - audit_log_end(audit_buf); + + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_ADD, audit_info); + audit_log_format(audit_buf, " nlbl_domain=%s", audit_domain); + switch (entry->type) { + case NETLBL_NLTYPE_UNLABELED: + audit_log_format(audit_buf, " nlbl_protocol=unlbl"); + break; + case NETLBL_NLTYPE_CIPSOV4: + audit_log_format(audit_buf, + " nlbl_protocol=cipsov4 cipso_doi=%u", + entry->type_def.cipsov4->doi); + break; } + audit_log_format(audit_buf, " res=%u", ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + rcu_read_unlock(); if (ret_val != 0) { @@ -279,7 +280,7 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) /** * netlbl_domhsh_add_default - Adds the default entry to the domain hash table * @entry: the entry to add - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Adds a new default entry to the domain hash table and handles any updates @@ -287,15 +288,16 @@ int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid) * negative on failure. * */ -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info) { - return netlbl_domhsh_add(entry, audit_secid); + return netlbl_domhsh_add(entry, audit_info); } /** * netlbl_domhsh_remove - Removes an entry from the domain hash table * @domain: the domain to remove - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes an entry from the domain hash table and handles any updates to the @@ -303,7 +305,7 @@ int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid) * negative on failure. * */ -int netlbl_domhsh_remove(const char *domain, u32 audit_secid) +int netlbl_domhsh_remove(const char *domain, struct netlbl_audit *audit_info) { int ret_val = -ENOENT; struct netlbl_dom_map *entry; @@ -345,18 +347,20 @@ int netlbl_domhsh_remove(const char *domain, u32 audit_secid) ret_val = -ENOENT; spin_unlock(&netlbl_domhsh_def_lock); } - if (ret_val == 0) { - if (entry->domain != NULL) - audit_domain = entry->domain; - else - audit_domain = "(default)"; - audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, - audit_secid); - audit_log_format(audit_buf, " domain=%s", audit_domain); - audit_log_end(audit_buf); + if (entry->domain != NULL) + audit_domain = entry->domain; + else + audit_domain = "(default)"; + audit_buf = netlbl_audit_start_common(AUDIT_MAC_MAP_DEL, audit_info); + audit_log_format(audit_buf, + " nlbl_domain=%s res=%u", + audit_domain, + ret_val == 0 ? 1 : 0); + audit_log_end(audit_buf); + + if (ret_val == 0) call_rcu(&entry->rcu, netlbl_domhsh_free_entry); - } remove_return: rcu_read_unlock(); @@ -365,7 +369,7 @@ remove_return: /** * netlbl_domhsh_remove_default - Removes the default entry from the table - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Removes/resets the default entry for the domain hash table and handles any @@ -373,9 +377,9 @@ remove_return: * success, non-zero on failure. * */ -int netlbl_domhsh_remove_default(u32 audit_secid) +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info) { - return netlbl_domhsh_remove(NULL, audit_secid); + return netlbl_domhsh_remove(NULL, audit_info); } /** diff --git a/net/netlabel/netlabel_domainhash.h b/net/netlabel/netlabel_domainhash.h index d50f13cacdca..3689956c3436 100644 --- a/net/netlabel/netlabel_domainhash.h +++ b/net/netlabel/netlabel_domainhash.h @@ -57,9 +57,11 @@ struct netlbl_dom_map { int netlbl_domhsh_init(u32 size); /* Manipulate the domain hash table */ -int netlbl_domhsh_add(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, u32 audit_secid); -int netlbl_domhsh_remove_default(u32 audit_secid); +int netlbl_domhsh_add(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_add_default(struct netlbl_dom_map *entry, + struct netlbl_audit *audit_info); +int netlbl_domhsh_remove_default(struct netlbl_audit *audit_info); struct netlbl_dom_map *netlbl_domhsh_getentry(const char *domain); int netlbl_domhsh_walk(u32 *skip_bkt, u32 *skip_chain, diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 0ac314f18ad1..53c9079ad2c3 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -87,11 +87,14 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) struct netlbl_dom_map *entry = NULL; size_t tmp_size; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN] || !info->attrs[NLBL_MGMT_A_PROTOCOL]) goto add_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -108,7 +111,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -125,7 +128,7 @@ static int netlbl_mgmt_add(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto add_failure; } - ret_val = netlbl_domhsh_add(entry, NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add(entry, &audit_info); rcu_read_unlock(); break; default: @@ -156,12 +159,15 @@ add_failure: static int netlbl_mgmt_remove(struct sk_buff *skb, struct genl_info *info) { char *domain; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_DOMAIN]) return -EINVAL; + netlbl_netlink_auditinfo(skb, &audit_info); + domain = nla_data(info->attrs[NLBL_MGMT_A_DOMAIN]); - return netlbl_domhsh_remove(domain, NETLINK_CB(skb).sid); + return netlbl_domhsh_remove(domain, &audit_info); } /** @@ -264,10 +270,13 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) int ret_val = -EINVAL; struct netlbl_dom_map *entry = NULL; u32 tmp_val; + struct netlbl_audit audit_info; if (!info->attrs[NLBL_MGMT_A_PROTOCOL]) goto adddef_failure; + netlbl_netlink_auditinfo(skb, &audit_info); + entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) { ret_val = -ENOMEM; @@ -277,8 +286,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) switch (entry->type) { case NETLBL_NLTYPE_UNLABELED: - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); break; case NETLBL_NLTYPE_CIPSOV4: if (!info->attrs[NLBL_MGMT_A_CV4DOI]) @@ -295,8 +303,7 @@ static int netlbl_mgmt_adddef(struct sk_buff *skb, struct genl_info *info) rcu_read_unlock(); goto adddef_failure; } - ret_val = netlbl_domhsh_add_default(entry, - NETLINK_CB(skb).sid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); rcu_read_unlock(); break; default: @@ -324,7 +331,11 @@ adddef_failure: */ static int netlbl_mgmt_removedef(struct sk_buff *skb, struct genl_info *info) { - return netlbl_domhsh_remove_default(NETLINK_CB(skb).sid); + struct netlbl_audit audit_info; + + netlbl_netlink_auditinfo(skb, &audit_info); + + return netlbl_domhsh_remove_default(&audit_info); } /** diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index ab36675fee8c..1833ad233b39 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -70,18 +70,25 @@ static struct nla_policy netlbl_unlabel_genl_policy[NLBL_UNLABEL_A_MAX + 1] = { /** * netlbl_unlabel_acceptflg_set - Set the unlabeled accept flag * @value: desired value - * @audit_secid: the LSM secid to use in the audit message + * @audit_info: NetLabel audit information * * Description: * Set the value of the unlabeled accept flag to @value. * */ -static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) +static void netlbl_unlabel_acceptflg_set(u8 value, + struct netlbl_audit *audit_info) { + struct audit_buffer *audit_buf; + u8 old_val; + + old_val = atomic_read(&netlabel_unlabel_accept_flg); atomic_set(&netlabel_unlabel_accept_flg, value); - netlbl_audit_nomsg((value ? - AUDIT_MAC_UNLBL_ACCEPT : AUDIT_MAC_UNLBL_DENY), - audit_secid); + + audit_buf = netlbl_audit_start_common(AUDIT_MAC_UNLBL_ALLOW, + audit_info); + audit_log_format(audit_buf, " unlbl_accept=%u old=%u", value, old_val); + audit_log_end(audit_buf); } /* @@ -101,12 +108,13 @@ static void netlbl_unlabel_acceptflg_set(u8 value, u32 audit_secid) static int netlbl_unlabel_accept(struct sk_buff *skb, struct genl_info *info) { u8 value; + struct netlbl_audit audit_info; if (info->attrs[NLBL_UNLABEL_A_ACPTFLG]) { value = nla_get_u8(info->attrs[NLBL_UNLABEL_A_ACPTFLG]); if (value == 1 || value == 0) { - netlbl_unlabel_acceptflg_set(value, - NETLINK_CB(skb).sid); + netlbl_netlink_auditinfo(skb, &audit_info); + netlbl_unlabel_acceptflg_set(value, &audit_info); return 0; } } @@ -250,19 +258,23 @@ int netlbl_unlabel_defconf(void) { int ret_val; struct netlbl_dom_map *entry; - u32 secid; + struct netlbl_audit audit_info; - security_task_getsecid(current, &secid); + /* Only the kernel is allowed to call this function and the only time + * it is called is at bootup before the audit subsystem is reporting + * messages so don't worry to much about these values. */ + security_task_getsecid(current, &audit_info.secid); + audit_info.loginuid = 0; entry = kzalloc(sizeof(*entry), GFP_KERNEL); if (entry == NULL) return -ENOMEM; entry->type = NETLBL_NLTYPE_UNLABELED; - ret_val = netlbl_domhsh_add_default(entry, secid); + ret_val = netlbl_domhsh_add_default(entry, &audit_info); if (ret_val != 0) return ret_val; - netlbl_unlabel_acceptflg_set(1, secid); + netlbl_unlabel_acceptflg_set(1, &audit_info); return 0; } diff --git a/net/netlabel/netlabel_user.c b/net/netlabel/netlabel_user.c index c2343af584cb..98a416381e61 100644 --- a/net/netlabel/netlabel_user.c +++ b/net/netlabel/netlabel_user.c @@ -85,7 +85,7 @@ int netlbl_netlink_init(void) /** * netlbl_audit_start_common - Start an audit message * @type: audit message type - * @secid: LSM context ID + * @audit_info: NetLabel audit information * * Description: * Start an audit message using the type specified in @type and fill the audit @@ -93,14 +93,11 @@ int netlbl_netlink_init(void) * a pointer to the audit buffer on success, NULL on failure. * */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info) { struct audit_context *audit_ctx = current->audit_context; struct audit_buffer *audit_buf; - uid_t audit_loginuid; - const char *audit_tty; - char audit_comm[sizeof(current->comm)]; - struct vm_area_struct *vma; char *secctx; u32 secctx_len; @@ -108,60 +105,13 @@ struct audit_buffer *netlbl_audit_start_common(int type, u32 secid) if (audit_buf == NULL) return NULL; - audit_loginuid = audit_get_loginuid(audit_ctx); - if (current->signal && - current->signal->tty && - current->signal->tty->name) - audit_tty = current->signal->tty->name; - else - audit_tty = "(none)"; - get_task_comm(audit_comm, current); + audit_log_format(audit_buf, "netlabel: auid=%u", audit_info->loginuid); - audit_log_format(audit_buf, - "netlabel: auid=%u uid=%u tty=%s pid=%d", - audit_loginuid, - current->uid, - audit_tty, - current->pid); - audit_log_format(audit_buf, " comm="); - audit_log_untrustedstring(audit_buf, audit_comm); - if (current->mm) { - down_read(¤t->mm->mmap_sem); - vma = current->mm->mmap; - while (vma) { - if ((vma->vm_flags & VM_EXECUTABLE) && - vma->vm_file) { - audit_log_d_path(audit_buf, - " exe=", - vma->vm_file->f_dentry, - vma->vm_file->f_vfsmnt); - break; - } - vma = vma->vm_next; - } - up_read(¤t->mm->mmap_sem); - } - - if (secid != 0 && - security_secid_to_secctx(secid, &secctx, &secctx_len) == 0) + if (audit_info->secid != 0 && + security_secid_to_secctx(audit_info->secid, + &secctx, + &secctx_len) == 0) audit_log_format(audit_buf, " subj=%s", secctx); return audit_buf; } - -/** - * netlbl_audit_nomsg - Send an audit message without additional text - * @type: audit message type - * @secid: LSM context ID - * - * Description: - * Send an audit message with only the common NetLabel audit fields. - * - */ -void netlbl_audit_nomsg(int type, u32 secid) -{ - struct audit_buffer *audit_buf; - - audit_buf = netlbl_audit_start_common(type, secid); - audit_log_end(audit_buf); -} diff --git a/net/netlabel/netlabel_user.h b/net/netlabel/netlabel_user.h index ab840acfc964..47967ef32964 100644 --- a/net/netlabel/netlabel_user.h +++ b/net/netlabel/netlabel_user.h @@ -72,13 +72,25 @@ static inline void *netlbl_netlink_hdr_put(struct sk_buff *skb, NETLBL_PROTO_VERSION); } +/** + * netlbl_netlink_auditinfo - Fetch the audit information from a NETLINK msg + * @skb: the packet + * @audit_info: NetLabel audit information + */ +static inline void netlbl_netlink_auditinfo(struct sk_buff *skb, + struct netlbl_audit *audit_info) +{ + audit_info->secid = NETLINK_CB(skb).sid; + audit_info->loginuid = NETLINK_CB(skb).loginuid; +} + /* NetLabel NETLINK I/O functions */ int netlbl_netlink_init(void); /* NetLabel Audit Functions */ -struct audit_buffer *netlbl_audit_start_common(int type, u32 secid); -void netlbl_audit_nomsg(int type, u32 secid); +struct audit_buffer *netlbl_audit_start_common(int type, + struct netlbl_audit *audit_info); #endif -- cgit v1.3-8-gc7d7 From 027445c37282bc1ed26add45e573ad2d3e4860a5 Mon Sep 17 00:00:00 2001 From: Badari Pulavarty Date: Sat, 30 Sep 2006 23:28:46 -0700 Subject: [PATCH] Vectorize aio_read/aio_write fileop methods This patch vectorizes aio_read() and aio_write() methods to prepare for collapsing all aio & vectored operations into one interface - which is aio_read()/aio_write(). Signed-off-by: Badari Pulavarty Signed-off-by: Christoph Hellwig Cc: Michael Holzheu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/filesystems/Locking | 5 +-- Documentation/filesystems/vfs.txt | 4 +- arch/s390/hypfs/inode.c | 17 +++++++-- drivers/char/raw.c | 14 +------ drivers/usb/gadget/inode.c | 80 +++++++++++++++++++++++++++------------ fs/aio.c | 15 ++++++-- fs/block_dev.c | 10 +---- fs/cifs/cifsfs.c | 6 +-- fs/ext3/file.c | 5 ++- fs/nfs/direct.c | 26 ++++++++++--- fs/nfs/file.c | 34 +++++++++-------- fs/ntfs/file.c | 8 ++-- fs/ocfs2/file.c | 28 +++++++------- fs/read_write.c | 20 ++++++++-- fs/reiserfs/file.c | 4 +- fs/xfs/linux-2.6/xfs_file.c | 46 +++++++++++----------- include/linux/aio.h | 2 + include/linux/fs.h | 10 ++--- include/linux/nfs_fs.h | 10 +++-- include/net/sock.h | 1 - mm/filemap.c | 39 +++++++++---------- net/socket.c | 49 +++++++++++------------- 22 files changed, 242 insertions(+), 191 deletions(-) (limited to 'include/net') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index 247d7f619aa2..eb1a6cad21e6 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -356,10 +356,9 @@ The last two are called only from check_disk_change(). prototypes: loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, - loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index 1cb7e8be927a..cd07c21b8400 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -699,9 +699,9 @@ This describes how the VFS can manipulate an open file. As of kernel struct file_operations { loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); diff --git a/arch/s390/hypfs/inode.c b/arch/s390/hypfs/inode.c index 813fc21358f9..cd702ae45d6d 100644 --- a/arch/s390/hypfs/inode.c +++ b/arch/s390/hypfs/inode.c @@ -134,12 +134,20 @@ static int hypfs_open(struct inode *inode, struct file *filp) return 0; } -static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, - size_t count, loff_t offset) +static ssize_t hypfs_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { char *data; size_t len; struct file *filp = iocb->ki_filp; + /* XXX: temporary */ + char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; + + if (nr_segs != 1) { + count = -EINVAL; + goto out; + } data = filp->private_data; len = strlen(data); @@ -158,12 +166,13 @@ static ssize_t hypfs_aio_read(struct kiocb *iocb, __user char *buf, out: return count; } -static ssize_t hypfs_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t hypfs_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t offset) { int rc; struct super_block *sb; struct hypfs_sb_info *fs_info; + size_t count = iov_length(iov, nr_segs); sb = iocb->ki_filp->f_dentry->d_inode->i_sb; fs_info = sb->s_fs_info; diff --git a/drivers/char/raw.c b/drivers/char/raw.c index c596a08c07b3..173fb08555d5 100644 --- a/drivers/char/raw.c +++ b/drivers/char/raw.c @@ -249,23 +249,11 @@ static ssize_t raw_file_write(struct file *file, const char __user *buf, return generic_file_write_nolock(file, &local_iov, 1, ppos); } -static ssize_t raw_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { - .iov_base = (char __user *)buf, - .iov_len = count - }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - - static const struct file_operations raw_fops = { .read = generic_file_read, .aio_read = generic_file_aio_read, .write = raw_file_write, - .aio_write = raw_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .open = raw_open, .release= raw_release, .ioctl = raw_ioctl, diff --git a/drivers/usb/gadget/inode.c b/drivers/usb/gadget/inode.c index 4a000d846a93..86924f9cdd7e 100644 --- a/drivers/usb/gadget/inode.c +++ b/drivers/usb/gadget/inode.c @@ -533,7 +533,8 @@ struct kiocb_priv { struct usb_request *req; struct ep_data *epdata; void *buf; - char __user *ubuf; /* NULL for writes */ + const struct iovec *iv; + unsigned long nr_segs; unsigned actual; }; @@ -561,17 +562,32 @@ static int ep_aio_cancel(struct kiocb *iocb, struct io_event *e) static ssize_t ep_aio_read_retry(struct kiocb *iocb) { struct kiocb_priv *priv = iocb->private; - ssize_t status = priv->actual; - - /* we "retry" to get the right mm context for this: */ - status = copy_to_user(priv->ubuf, priv->buf, priv->actual); - if (unlikely(0 != status)) - status = -EFAULT; - else - status = priv->actual; - kfree(priv->buf); - kfree(priv); - return status; + ssize_t len, total; + int i; + + /* we "retry" to get the right mm context for this: */ + + /* copy stuff into user buffers */ + total = priv->actual; + len = 0; + for (i=0; i < priv->nr_segs; i++) { + ssize_t this = min((ssize_t)(priv->iv[i].iov_len), total); + + if (copy_to_user(priv->iv[i].iov_base, priv->buf, this)) { + if (len == 0) + len = -EFAULT; + break; + } + + total -= this; + len += this; + if (total == 0) + break; + } + kfree(priv->buf); + kfree(priv); + aio_put_req(iocb); + return len; } static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) @@ -584,7 +600,7 @@ static void ep_aio_complete(struct usb_ep *ep, struct usb_request *req) spin_lock(&epdata->dev->lock); priv->req = NULL; priv->epdata = NULL; - if (priv->ubuf == NULL + if (priv->iv == NULL || unlikely(req->actual == 0) || unlikely(kiocbIsCancelled(iocb))) { kfree(req->buf); @@ -619,7 +635,8 @@ ep_aio_rwtail( char *buf, size_t len, struct ep_data *epdata, - char __user *ubuf + const struct iovec *iv, + unsigned long nr_segs ) { struct kiocb_priv *priv; @@ -634,7 +651,8 @@ fail: return value; } iocb->private = priv; - priv->ubuf = ubuf; + priv->iv = iv; + priv->nr_segs = nr_segs; value = get_ready_ep(iocb->ki_filp->f_flags, epdata); if (unlikely(value < 0)) { @@ -674,41 +692,53 @@ fail: kfree(priv); put_ep(epdata); } else - value = (ubuf ? -EIOCBRETRY : -EIOCBQUEUED); + value = (iv ? -EIOCBRETRY : -EIOCBQUEUED); return value; } static ssize_t -ep_aio_read(struct kiocb *iocb, char __user *ubuf, size_t len, loff_t o) +ep_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; if (unlikely(epdata->desc.bEndpointAddress & USB_DIR_IN)) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; + iocb->ki_retry = ep_aio_read_retry; - return ep_aio_rwtail(iocb, buf, len, epdata, ubuf); + return ep_aio_rwtail(iocb, buf, iocb->ki_left, epdata, iov, nr_segs); } static ssize_t -ep_aio_write(struct kiocb *iocb, const char __user *ubuf, size_t len, loff_t o) +ep_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t o) { struct ep_data *epdata = iocb->ki_filp->private_data; char *buf; + size_t len = 0; + int i = 0; if (unlikely(!(epdata->desc.bEndpointAddress & USB_DIR_IN))) return -EINVAL; - buf = kmalloc(len, GFP_KERNEL); + + buf = kmalloc(iocb->ki_left, GFP_KERNEL); if (unlikely(!buf)) return -ENOMEM; - if (unlikely(copy_from_user(buf, ubuf, len) != 0)) { - kfree(buf); - return -EFAULT; + + for (i=0; i < nr_segs; i++) { + if (unlikely(copy_from_user(&buf[len], iov[i].iov_base, + iov[i].iov_len) != 0)) { + kfree(buf); + return -EFAULT; + } + len += iov[i].iov_len; } - return ep_aio_rwtail(iocb, buf, len, epdata, NULL); + return ep_aio_rwtail(iocb, buf, len, epdata, NULL, 0); } /*----------------------------------------------------------------------*/ diff --git a/fs/aio.c b/fs/aio.c index 950630187acc..27ff56540c73 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -15,6 +15,7 @@ #include #include #include +#include #define DEBUG 0 @@ -1315,8 +1316,11 @@ static ssize_t aio_pread(struct kiocb *iocb) ssize_t ret = 0; do { - ret = file->f_op->aio_read(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + iocb->ki_inline_vec.iov_base = iocb->ki_buf; + iocb->ki_inline_vec.iov_len = iocb->ki_left; + + ret = file->f_op->aio_read(iocb, &iocb->ki_inline_vec, + 1, iocb->ki_pos); /* * Can't just depend on iocb->ki_left to determine * whether we are done. This may have been a short read. @@ -1349,8 +1353,11 @@ static ssize_t aio_pwrite(struct kiocb *iocb) ssize_t ret = 0; do { - ret = file->f_op->aio_write(iocb, iocb->ki_buf, - iocb->ki_left, iocb->ki_pos); + iocb->ki_inline_vec.iov_base = iocb->ki_buf; + iocb->ki_inline_vec.iov_len = iocb->ki_left; + + ret = file->f_op->aio_write(iocb, &iocb->ki_inline_vec, + 1, iocb->ki_pos); if (ret > 0) { iocb->ki_buf += ret; iocb->ki_left -= ret; diff --git a/fs/block_dev.c b/fs/block_dev.c index 0c361ea7e5a6..8c819117310b 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1162,14 +1162,6 @@ static ssize_t blkdev_file_write(struct file *file, const char __user *buf, return generic_file_write_nolock(file, &local_iov, 1, ppos); } -static ssize_t blkdev_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) -{ - struct iovec local_iov = { .iov_base = (void __user *)buf, .iov_len = count }; - - return generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); -} - static long block_ioctl(struct file *file, unsigned cmd, unsigned long arg) { return blkdev_ioctl(file->f_mapping->host, file, cmd, arg); @@ -1192,7 +1184,7 @@ const struct file_operations def_blk_fops = { .read = generic_file_read, .write = blkdev_file_write, .aio_read = generic_file_aio_read, - .aio_write = blkdev_file_aio_write, + .aio_write = generic_file_aio_write_nolock, .mmap = generic_file_mmap, .fsync = block_fsync, .unlocked_ioctl = block_ioctl, diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 22bcf4d7e7ae..5abb42a7c53e 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -492,13 +492,13 @@ static ssize_t cifs_file_writev(struct file *file, const struct iovec *iov, return written; } -static ssize_t cifs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t cifs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct inode *inode = iocb->ki_filp->f_dentry->d_inode; ssize_t written; - written = generic_file_aio_write(iocb, buf, count, pos); + written = generic_file_aio_write(iocb, iov, nr_segs, pos); if (!CIFS_I(inode)->clientCanCacheAll) filemap_fdatawrite(inode->i_mapping); return written; diff --git a/fs/ext3/file.c b/fs/ext3/file.c index 74ff20f9d09b..5c762457bc89 100644 --- a/fs/ext3/file.c +++ b/fs/ext3/file.c @@ -48,14 +48,15 @@ static int ext3_release_file (struct inode * inode, struct file * filp) } static ssize_t -ext3_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ext3_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct inode *inode = file->f_dentry->d_inode; ssize_t ret; int err; - ret = generic_file_aio_write(iocb, buf, count, pos); + ret = generic_file_aio_write(iocb, iov, nr_segs, pos); /* * Skip flushing if there was an error, or if nothing was written. diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index 377839bed172..9f7f8b9ea1e2 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -707,8 +707,8 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz /** * nfs_file_direct_read - file direct read operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer into which to read data - * @count: number of bytes to read + * @iov: vector of user buffers into which to read data + * @nr_segs: size of iov vector * @pos: byte offset in file where reading starts * * We use this function for direct reads instead of calling @@ -725,17 +725,24 @@ static ssize_t nfs_direct_write(struct kiocb *iocb, unsigned long user_addr, siz * client must read the updated atime from the server back into its * cache. */ -ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval = -EINVAL; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dprintk("nfs: direct read(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + if (count < 0) goto out; retval = -EFAULT; @@ -760,8 +767,8 @@ out: /** * nfs_file_direct_write - file direct write operation for NFS files * @iocb: target I/O control block - * @buf: user's buffer from which to write data - * @count: number of bytes to write + * @iov: vector of user buffers from which to write data + * @nr_segs: size of iov vector * @pos: byte offset in file where writing starts * * We use this function for direct writes instead of calling @@ -782,17 +789,24 @@ out: * Note that O_APPEND is not supported for NFS direct writes, as there * is no atomic O_APPEND write facility in the NFS protocol. */ -ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +ssize_t nfs_file_direct_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { ssize_t retval; struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; + /* XXX: temporary */ + const char __user *buf = iov[0].iov_base; + size_t count = iov[0].iov_len; dfprintk(VFS, "nfs: direct write(%s/%s, %lu@%Ld)\n", file->f_dentry->d_parent->d_name.name, file->f_dentry->d_name.name, (unsigned long) count, (long long) pos); + if (nr_segs != 1) + return -EINVAL; + retval = generic_write_checks(file, &pos, &count, 0); if (retval) goto out; diff --git a/fs/nfs/file.c b/fs/nfs/file.c index be997d649127..cc93865cea93 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -41,8 +41,10 @@ static int nfs_file_release(struct inode *, struct file *); static loff_t nfs_file_llseek(struct file *file, loff_t offset, int origin); static int nfs_file_mmap(struct file *, struct vm_area_struct *); static ssize_t nfs_file_sendfile(struct file *, loff_t *, size_t, read_actor_t, void *); -static ssize_t nfs_file_read(struct kiocb *, char __user *, size_t, loff_t); -static ssize_t nfs_file_write(struct kiocb *, const char __user *, size_t, loff_t); +static ssize_t nfs_file_read(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t nfs_file_write(struct kiocb *, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int nfs_file_flush(struct file *, fl_owner_t id); static int nfs_fsync(struct file *, struct dentry *dentry, int datasync); static int nfs_check_flags(int flags); @@ -53,8 +55,8 @@ const struct file_operations nfs_file_operations = { .llseek = nfs_file_llseek, .read = do_sync_read, .write = do_sync_write, - .aio_read = nfs_file_read, - .aio_write = nfs_file_write, + .aio_read = nfs_file_read, + .aio_write = nfs_file_write, .mmap = nfs_file_mmap, .open = nfs_file_open, .flush = nfs_file_flush, @@ -196,15 +198,17 @@ nfs_file_flush(struct file *file, fl_owner_t id) } static ssize_t -nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) +nfs_file_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_read(iocb, buf, count, pos); + return nfs_file_direct_read(iocb, iov, nr_segs, pos); #endif dfprintk(VFS, "nfs: read(%s/%s, %lu@%lu)\n", @@ -214,7 +218,7 @@ nfs_file_read(struct kiocb *iocb, char __user * buf, size_t count, loff_t pos) result = nfs_revalidate_mapping(inode, iocb->ki_filp->f_mapping); nfs_add_stats(inode, NFSIOS_NORMALREADBYTES, count); if (!result) - result = generic_file_aio_read(iocb, buf, count, pos); + result = generic_file_aio_read(iocb, iov, nr_segs, pos); return result; } @@ -336,24 +340,22 @@ const struct address_space_operations nfs_file_aops = { #endif }; -/* - * Write to a file (through the page cache). - */ -static ssize_t -nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t pos) +static ssize_t nfs_file_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct dentry * dentry = iocb->ki_filp->f_dentry; struct inode * inode = dentry->d_inode; ssize_t result; + size_t count = iov_length(iov, nr_segs); #ifdef CONFIG_NFS_DIRECTIO if (iocb->ki_filp->f_flags & O_DIRECT) - return nfs_file_direct_write(iocb, buf, count, pos); + return nfs_file_direct_write(iocb, iov, nr_segs, pos); #endif - dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%lu)\n", + dfprintk(VFS, "nfs: write(%s/%s(%ld), %lu@%Ld)\n", dentry->d_parent->d_name.name, dentry->d_name.name, - inode->i_ino, (unsigned long) count, (unsigned long) pos); + inode->i_ino, (unsigned long) count, (long long) pos); result = -EBUSY; if (IS_SWAPFILE(inode)) @@ -372,7 +374,7 @@ nfs_file_write(struct kiocb *iocb, const char __user *buf, size_t count, loff_t goto out; nfs_add_stats(inode, NFSIOS_NORMALWRITTENBYTES, count); - result = generic_file_aio_write(iocb, buf, count, pos); + result = generic_file_aio_write(iocb, iov, nr_segs, pos); out: return result; diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 585a79d39c9d..0c46f5c86b71 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -2176,20 +2176,18 @@ out: /** * ntfs_file_aio_write - */ -static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +static ssize_t ntfs_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = ntfs_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = ntfs_file_aio_write_nolock(iocb, iov, nr_segs, &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { int err = sync_page_range(inode, mapping, pos, ret); diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 2bbfa17090cf..d9ba0a931a03 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -961,25 +961,23 @@ static inline int ocfs2_write_should_remove_suid(struct inode *inode) } static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; int ret, rw_level = -1, meta_level = -1, have_alloc_sem = 0; u32 clusters; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; loff_t newsize, saved_pos; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); /* happy write of zero bytes */ - if (count == 0) + if (iocb->ki_left == 0) return 0; if (!inode) { @@ -1048,7 +1046,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, } else { saved_pos = iocb->ki_pos; } - newsize = count + saved_pos; + newsize = iocb->ki_left + saved_pos; mlog(0, "pos=%lld newsize=%lld cursize=%lld\n", (long long) saved_pos, (long long) newsize, @@ -1081,7 +1079,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, if (!clusters) break; - ret = ocfs2_extend_file(inode, NULL, newsize, count); + ret = ocfs2_extend_file(inode, NULL, newsize, iocb->ki_left); if (ret < 0) { if (ret != -ENOSPC) mlog_errno(ret); @@ -1098,7 +1096,7 @@ static ssize_t ocfs2_file_aio_write(struct kiocb *iocb, /* communicate with ocfs2_dio_end_io */ ocfs2_iocb_set_rw_locked(iocb); - ret = generic_file_aio_write_nolock(iocb, &local_iov, 1, &iocb->ki_pos); + ret = generic_file_aio_write_nolock(iocb, iov, nr_segs, iocb->ki_pos); /* buffered aio wouldn't have proper lock coverage today */ BUG_ON(ret == -EIOCBQUEUED && !(filp->f_flags & O_DIRECT)); @@ -1132,16 +1130,16 @@ out: } static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { int ret = 0, rw_level = -1, have_alloc_sem = 0; struct file *filp = iocb->ki_filp; struct inode *inode = filp->f_dentry->d_inode; - mlog_entry("(0x%p, 0x%p, %u, '%.*s')\n", filp, buf, - (unsigned int)count, + mlog_entry("(0x%p, %u, '%.*s')\n", filp, + (unsigned int)nr_segs, filp->f_dentry->d_name.len, filp->f_dentry->d_name.name); @@ -1185,7 +1183,7 @@ static ssize_t ocfs2_file_aio_read(struct kiocb *iocb, } ocfs2_meta_unlock(inode, 0); - ret = generic_file_aio_read(iocb, buf, count, iocb->ki_pos); + ret = generic_file_aio_read(iocb, iov, nr_segs, iocb->ki_pos); if (ret == -EINVAL) mlog(ML_ERROR, "generic_file_aio_read returned -EINVAL\n"); diff --git a/fs/read_write.c b/fs/read_write.c index d4cb3183c99c..679dd535380f 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -227,14 +227,20 @@ static void wait_on_retry_sync_kiocb(struct kiocb *iocb) ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_read(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_read(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); @@ -279,14 +285,20 @@ EXPORT_SYMBOL(vfs_read); ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos) { + struct iovec iov = { .iov_base = (void __user *)buf, .iov_len = len }; struct kiocb kiocb; ssize_t ret; init_sync_kiocb(&kiocb, filp); kiocb.ki_pos = *ppos; - while (-EIOCBRETRY == - (ret = filp->f_op->aio_write(&kiocb, buf, len, kiocb.ki_pos))) + kiocb.ki_left = len; + + for (;;) { + ret = filp->f_op->aio_write(&kiocb, &iov, 1, kiocb.ki_pos); + if (ret != -EIOCBRETRY) + break; wait_on_retry_sync_kiocb(&kiocb); + } if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); diff --git a/fs/reiserfs/file.c b/fs/reiserfs/file.c index c11f6118c9ca..41f24369e47a 100644 --- a/fs/reiserfs/file.c +++ b/fs/reiserfs/file.c @@ -1334,7 +1334,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t if (err) return err; } - result = generic_file_write(file, buf, count, ppos); + result = do_sync_write(file, buf, count, ppos); if (after_file_end) { /* Now update i_size and remove the savelink */ struct reiserfs_transaction_handle th; @@ -1566,7 +1566,7 @@ static ssize_t reiserfs_file_write(struct file *file, /* the file we are going t } const struct file_operations reiserfs_file_operations = { - .read = generic_file_read, + .read = do_sync_read, .write = reiserfs_file_write, .ioctl = reiserfs_ioctl, #ifdef CONFIG_COMPAT diff --git a/fs/xfs/linux-2.6/xfs_file.c b/fs/xfs/linux-2.6/xfs_file.c index 41cfcba7ce49..4737971c6a39 100644 --- a/fs/xfs/linux-2.6/xfs_file.c +++ b/fs/xfs/linux-2.6/xfs_file.c @@ -49,50 +49,49 @@ static struct vm_operations_struct xfs_dmapi_file_vm_ops; STATIC inline ssize_t __xfs_file_read( struct kiocb *iocb, - char __user *buf, + const struct iovec *iov, + unsigned long nr_segs, int ioflags, - size_t count, loff_t pos) { - struct iovec iov = {buf, count}; struct file *file = iocb->ki_filp; bhv_vnode_t *vp = vn_from_inode(file->f_dentry->d_inode); BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_read(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_read(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_read( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_read_invis( struct kiocb *iocb, - char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_read(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); + return __xfs_file_read(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC inline ssize_t __xfs_file_write( - struct kiocb *iocb, - const char __user *buf, - int ioflags, - size_t count, - loff_t pos) + struct kiocb *iocb, + const struct iovec *iov, + unsigned long nr_segs, + int ioflags, + loff_t pos) { - struct iovec iov = {(void __user *)buf, count}; struct file *file = iocb->ki_filp; struct inode *inode = file->f_mapping->host; bhv_vnode_t *vp = vn_from_inode(inode); @@ -100,27 +99,28 @@ __xfs_file_write( BUG_ON(iocb->ki_pos != pos); if (unlikely(file->f_flags & O_DIRECT)) ioflags |= IO_ISDIRECT; - return bhv_vop_write(vp, iocb, &iov, 1, &iocb->ki_pos, ioflags, NULL); + return bhv_vop_write(vp, iocb, iov, nr_segs, &iocb->ki_pos, + ioflags, NULL); } STATIC ssize_t xfs_file_aio_write( struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_write(iocb, buf, IO_ISAIO, count, pos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO, pos); } STATIC ssize_t xfs_file_aio_write_invis( struct kiocb *iocb, - const char __user *buf, - size_t count, + const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - return __xfs_file_write(iocb, buf, IO_ISAIO|IO_INVIS, count, pos); + return __xfs_file_write(iocb, iov, nr_segs, IO_ISAIO|IO_INVIS, pos); } STATIC inline ssize_t diff --git a/include/linux/aio.h b/include/linux/aio.h index 8a0193385a9b..58349e58b749 100644 --- a/include/linux/aio.h +++ b/include/linux/aio.h @@ -4,6 +4,7 @@ #include #include #include +#include #include @@ -112,6 +113,7 @@ struct kiocb { long ki_retried; /* just for testing */ long ki_kicked; /* just for testing */ long ki_queued; /* just for testing */ + struct iovec ki_inline_vec; /* inline vector */ struct list_head ki_list; /* the aio core uses this * for cancellation */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 5baf3a153403..257bae16f545 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1097,9 +1097,9 @@ struct file_operations { struct module *owner; loff_t (*llseek) (struct file *, loff_t, int); ssize_t (*read) (struct file *, char __user *, size_t, loff_t *); - ssize_t (*aio_read) (struct kiocb *, char __user *, size_t, loff_t); ssize_t (*write) (struct file *, const char __user *, size_t, loff_t *); - ssize_t (*aio_write) (struct kiocb *, const char __user *, size_t, loff_t); + ssize_t (*aio_read) (struct kiocb *, const struct iovec *, unsigned long, loff_t); + ssize_t (*aio_write) (struct kiocb *, const struct iovec *, unsigned long, loff_t); int (*readdir) (struct file *, void *, filldir_t); unsigned int (*poll) (struct file *, struct poll_table_struct *); int (*ioctl) (struct inode *, struct file *, unsigned int, unsigned long); @@ -1704,11 +1704,11 @@ extern int file_send_actor(read_descriptor_t * desc, struct page *page, unsigned extern ssize_t generic_file_read(struct file *, char __user *, size_t, loff_t *); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_write(struct file *, const char __user *, size_t, loff_t *); -extern ssize_t generic_file_aio_read(struct kiocb *, char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t *); -extern ssize_t generic_file_aio_write(struct kiocb *, const char __user *, size_t, loff_t); +extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_aio_write_nolock(struct kiocb *, const struct iovec *, - unsigned long, loff_t *); + unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 98c9b9f667a5..76ff54846ada 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -367,10 +367,12 @@ extern int nfs3_removexattr (struct dentry *, const char *name); */ extern ssize_t nfs_direct_IO(int, struct kiocb *, const struct iovec *, loff_t, unsigned long); -extern ssize_t nfs_file_direct_read(struct kiocb *iocb, char __user *buf, - size_t count, loff_t pos); -extern ssize_t nfs_file_direct_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos); +extern ssize_t nfs_file_direct_read(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); +extern ssize_t nfs_file_direct_write(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, + loff_t pos); /* * linux/fs/nfs/dir.c diff --git a/include/net/sock.h b/include/net/sock.h index edd4d73ce7f5..40bb90ebb2d1 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -665,7 +665,6 @@ struct sock_iocb { struct sock *sk; struct scm_cookie *scm; struct msghdr *msg, async_msg; - struct iovec async_iov; struct kiocb *kiocb; }; diff --git a/mm/filemap.c b/mm/filemap.c index c4fe97f5ace0..f6c1d22b504f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1226,12 +1226,11 @@ out: EXPORT_SYMBOL(__generic_file_aio_read); ssize_t -generic_file_aio_read(struct kiocb *iocb, char __user *buf, size_t count, loff_t pos) +generic_file_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { - struct iovec local_iov = { .iov_base = buf, .iov_len = count }; - BUG_ON(iocb->ki_pos != pos); - return __generic_file_aio_read(iocb, &local_iov, 1, &iocb->ki_pos); + return __generic_file_aio_read(iocb, iov, nr_segs, &iocb->ki_pos); } EXPORT_SYMBOL(generic_file_aio_read); @@ -2315,22 +2314,22 @@ out: current->backing_dev_info = NULL; return written ? written : err; } -EXPORT_SYMBOL(generic_file_aio_write_nolock); -ssize_t -generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, - unsigned long nr_segs, loff_t *ppos) +ssize_t generic_file_aio_write_nolock(struct kiocb *iocb, + const struct iovec *iov, unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - loff_t pos = *ppos; - ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, ppos); + BUG_ON(iocb->ki_pos != pos); + + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { - int err; + ssize_t err; err = sync_page_range_nolock(inode, mapping, pos, ret); if (err < 0) @@ -2338,6 +2337,7 @@ generic_file_aio_write_nolock(struct kiocb *iocb, const struct iovec *iov, } return ret; } +EXPORT_SYMBOL(generic_file_aio_write_nolock); static ssize_t __generic_file_write_nolock(struct file *file, const struct iovec *iov, @@ -2347,8 +2347,9 @@ __generic_file_write_nolock(struct file *file, const struct iovec *iov, ssize_t ret; init_sync_kiocb(&kiocb, file); + kiocb.ki_pos = *ppos; ret = __generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); - if (ret == -EIOCBQUEUED) + if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); return ret; } @@ -2361,28 +2362,28 @@ generic_file_write_nolock(struct file *file, const struct iovec *iov, ssize_t ret; init_sync_kiocb(&kiocb, file); - ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, ppos); + kiocb.ki_pos = *ppos; + ret = generic_file_aio_write_nolock(&kiocb, iov, nr_segs, *ppos); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&kiocb); + *ppos = kiocb.ki_pos; return ret; } EXPORT_SYMBOL(generic_file_write_nolock); -ssize_t generic_file_aio_write(struct kiocb *iocb, const char __user *buf, - size_t count, loff_t pos) +ssize_t generic_file_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct file *file = iocb->ki_filp; struct address_space *mapping = file->f_mapping; struct inode *inode = mapping->host; ssize_t ret; - struct iovec local_iov = { .iov_base = (void __user *)buf, - .iov_len = count }; BUG_ON(iocb->ki_pos != pos); mutex_lock(&inode->i_mutex); - ret = __generic_file_aio_write_nolock(iocb, &local_iov, 1, - &iocb->ki_pos); + ret = __generic_file_aio_write_nolock(iocb, iov, nr_segs, + &iocb->ki_pos); mutex_unlock(&inode->i_mutex); if (ret > 0 && ((file->f_flags & O_SYNC) || IS_SYNC(inode))) { diff --git a/net/socket.c b/net/socket.c index 1bc4167e0da8..df92e4252749 100644 --- a/net/socket.c +++ b/net/socket.c @@ -95,10 +95,10 @@ #include static int sock_no_open(struct inode *irrelevant, struct file *dontcare); -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *buf, - size_t size, loff_t pos); -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *buf, - size_t size, loff_t pos); +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos); static int sock_mmap(struct file *file, struct vm_area_struct *vma); static int sock_close(struct inode *inode, struct file *file); @@ -664,7 +664,6 @@ static ssize_t sock_sendpage(struct file *file, struct page *page, } static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, - char __user *ubuf, size_t size, struct sock_iocb *siocb) { if (!is_sync_kiocb(iocb)) { @@ -675,16 +674,13 @@ static struct sock_iocb *alloc_sock_iocb(struct kiocb *iocb, } siocb->kiocb = iocb; - siocb->async_iov.iov_base = ubuf; - siocb->async_iov.iov_len = size; - iocb->private = siocb; return siocb; } static ssize_t do_sock_read(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -715,32 +711,33 @@ static ssize_t sock_readv(struct file *file, const struct iovec *iov, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; - ret = do_sock_read(&msg, &iocb, file, (struct iovec *)iov, nr_segs); + ret = do_sock_read(&msg, &iocb, file, iov, nr_segs); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; } -static ssize_t sock_aio_read(struct kiocb *iocb, char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_read(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, ubuf, count, &siocb); + + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_read(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } static ssize_t do_sock_write(struct msghdr *msg, struct kiocb *iocb, - struct file *file, struct iovec *iov, - unsigned long nr_segs) + struct file *file, const struct iovec *iov, + unsigned long nr_segs) { struct socket *sock = file->private_data; size_t size = 0; @@ -773,28 +770,28 @@ static ssize_t sock_writev(struct file *file, const struct iovec *iov, init_sync_kiocb(&iocb, NULL); iocb.private = &siocb; - ret = do_sock_write(&msg, &iocb, file, (struct iovec *)iov, nr_segs); + ret = do_sock_write(&msg, &iocb, file, iov, nr_segs); if (-EIOCBQUEUED == ret) ret = wait_on_sync_kiocb(&iocb); return ret; } -static ssize_t sock_aio_write(struct kiocb *iocb, const char __user *ubuf, - size_t count, loff_t pos) +static ssize_t sock_aio_write(struct kiocb *iocb, const struct iovec *iov, + unsigned long nr_segs, loff_t pos) { struct sock_iocb siocb, *x; if (pos != 0) return -ESPIPE; - if (count == 0) /* Match SYS5 behaviour */ + + if (iocb->ki_left == 0) /* Match SYS5 behaviour */ return 0; - x = alloc_sock_iocb(iocb, (void __user *)ubuf, count, &siocb); + x = alloc_sock_iocb(iocb, &siocb); if (!x) return -ENOMEM; - return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, - &x->async_iov, 1); + return do_sock_write(&x->async_msg, iocb, iocb->ki_filp, iov, nr_segs); } /* -- cgit v1.3-8-gc7d7 From 17db952cd16cecc76937b138c685ae3d198ab17c Mon Sep 17 00:00:00 2001 From: Balbir Singh Date: Sat, 30 Sep 2006 23:28:51 -0700 Subject: [PATCH] Add genetlink utilities for payload length calculation Add two utility helper functions genlmsg_msg_size() and genlmsg_total_size(). These functions are derived from their netlink counterparts. Signed-off-by: Balbir Singh Cc: Jamal Hadi Cc: Shailabh Nagar Cc: Thomas Graf Cc: "David S. Miller" Cc: Jay Lan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/genetlink.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/net') diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 4a38d85e4e25..b619314218a6 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -169,4 +169,22 @@ static inline int genlmsg_len(const struct genlmsghdr *gnlh) return (nlh->nlmsg_len - GENL_HDRLEN - NLMSG_HDRLEN); } +/** + * genlmsg_msg_size - length of genetlink message not including padding + * @payload: length of message payload + */ +static inline int genlmsg_msg_size(int payload) +{ + return GENL_HDRLEN + payload; +} + +/** + * genlmsg_total_size - length of genetlink message including padding + * @payload: length of message payload + */ +static inline int genlmsg_total_size(int payload) +{ + return NLMSG_ALIGN(genlmsg_msg_size(payload)); +} + #endif /* __NET_GENERIC_NETLINK_H */ -- cgit v1.3-8-gc7d7