[NETNS]: Enable IPv4 address manipulations inside namespace.
[h-e-n] / net / ipv4 / devinet.c
1 /*
2  *      NET3    IP device support routines.
3  *
4  *      Version: $Id: devinet.c,v 1.44 2001/10/31 21:55:54 davem Exp $
5  *
6  *              This program is free software; you can redistribute it and/or
7  *              modify it under the terms of the GNU General Public License
8  *              as published by the Free Software Foundation; either version
9  *              2 of the License, or (at your option) any later version.
10  *
11  *      Derived from the IP parts of dev.c 1.0.19
12  *              Authors:        Ross Biro
13  *                              Fred N. van Kempen, <waltje@uWalt.NL.Mugnet.ORG>
14  *                              Mark Evans, <evansmp@uhura.aston.ac.uk>
15  *
16  *      Additional Authors:
17  *              Alan Cox, <gw4pts@gw4pts.ampr.org>
18  *              Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
19  *
20  *      Changes:
21  *              Alexey Kuznetsov:       pa_* fields are replaced with ifaddr
22  *                                      lists.
23  *              Cyrus Durgin:           updated for kmod
24  *              Matthias Andree:        in devinet_ioctl, compare label and
25  *                                      address (4.4BSD alias style support),
26  *                                      fall back to comparing just the label
27  *                                      if no match found.
28  */
29
30
31 #include <asm/uaccess.h>
32 #include <asm/system.h>
33 #include <linux/bitops.h>
34 #include <linux/capability.h>
35 #include <linux/module.h>
36 #include <linux/types.h>
37 #include <linux/kernel.h>
38 #include <linux/string.h>
39 #include <linux/mm.h>
40 #include <linux/socket.h>
41 #include <linux/sockios.h>
42 #include <linux/in.h>
43 #include <linux/errno.h>
44 #include <linux/interrupt.h>
45 #include <linux/if_addr.h>
46 #include <linux/if_ether.h>
47 #include <linux/inet.h>
48 #include <linux/netdevice.h>
49 #include <linux/etherdevice.h>
50 #include <linux/skbuff.h>
51 #include <linux/init.h>
52 #include <linux/notifier.h>
53 #include <linux/inetdevice.h>
54 #include <linux/igmp.h>
55 #ifdef CONFIG_SYSCTL
56 #include <linux/sysctl.h>
57 #endif
58 #include <linux/kmod.h>
59
60 #include <net/arp.h>
61 #include <net/ip.h>
62 #include <net/route.h>
63 #include <net/ip_fib.h>
64 #include <net/rtnetlink.h>
65 #include <net/net_namespace.h>
66
67 static struct ipv4_devconf ipv4_devconf = {
68         .data = {
69                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
70                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
71                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
72                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
73         },
74 };
75
76 static struct ipv4_devconf ipv4_devconf_dflt = {
77         .data = {
78                 [NET_IPV4_CONF_ACCEPT_REDIRECTS - 1] = 1,
79                 [NET_IPV4_CONF_SEND_REDIRECTS - 1] = 1,
80                 [NET_IPV4_CONF_SECURE_REDIRECTS - 1] = 1,
81                 [NET_IPV4_CONF_SHARED_MEDIA - 1] = 1,
82                 [NET_IPV4_CONF_ACCEPT_SOURCE_ROUTE - 1] = 1,
83         },
84 };
85
86 #define IPV4_DEVCONF_DFLT(net, attr) \
87         IPV4_DEVCONF((*net->ipv4.devconf_dflt), attr)
88
89 static const struct nla_policy ifa_ipv4_policy[IFA_MAX+1] = {
90         [IFA_LOCAL]             = { .type = NLA_U32 },
91         [IFA_ADDRESS]           = { .type = NLA_U32 },
92         [IFA_BROADCAST]         = { .type = NLA_U32 },
93         [IFA_ANYCAST]           = { .type = NLA_U32 },
94         [IFA_LABEL]             = { .type = NLA_STRING, .len = IFNAMSIZ - 1 },
95 };
96
97 static void rtmsg_ifa(int event, struct in_ifaddr *, struct nlmsghdr *, u32);
98
99 static BLOCKING_NOTIFIER_HEAD(inetaddr_chain);
100 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
101                          int destroy);
102 #ifdef CONFIG_SYSCTL
103 static void devinet_sysctl_register(struct in_device *idev);
104 static void devinet_sysctl_unregister(struct in_device *idev);
105 #else
106 static inline void devinet_sysctl_register(struct in_device *idev)
107 {
108 }
109 static inline void devinet_sysctl_unregister(struct in_device *idev)
110 {
111 }
112 #endif
113
114 /* Locks all the inet devices. */
115
116 static struct in_ifaddr *inet_alloc_ifa(void)
117 {
118         struct in_ifaddr *ifa = kzalloc(sizeof(*ifa), GFP_KERNEL);
119
120         if (ifa) {
121                 INIT_RCU_HEAD(&ifa->rcu_head);
122         }
123
124         return ifa;
125 }
126
127 static void inet_rcu_free_ifa(struct rcu_head *head)
128 {
129         struct in_ifaddr *ifa = container_of(head, struct in_ifaddr, rcu_head);
130         if (ifa->ifa_dev)
131                 in_dev_put(ifa->ifa_dev);
132         kfree(ifa);
133 }
134
135 static inline void inet_free_ifa(struct in_ifaddr *ifa)
136 {
137         call_rcu(&ifa->rcu_head, inet_rcu_free_ifa);
138 }
139
140 void in_dev_finish_destroy(struct in_device *idev)
141 {
142         struct net_device *dev = idev->dev;
143
144         BUG_TRAP(!idev->ifa_list);
145         BUG_TRAP(!idev->mc_list);
146 #ifdef NET_REFCNT_DEBUG
147         printk(KERN_DEBUG "in_dev_finish_destroy: %p=%s\n",
148                idev, dev ? dev->name : "NIL");
149 #endif
150         dev_put(dev);
151         if (!idev->dead)
152                 printk("Freeing alive in_device %p\n", idev);
153         else {
154                 kfree(idev);
155         }
156 }
157
158 static struct in_device *inetdev_init(struct net_device *dev)
159 {
160         struct in_device *in_dev;
161
162         ASSERT_RTNL();
163
164         in_dev = kzalloc(sizeof(*in_dev), GFP_KERNEL);
165         if (!in_dev)
166                 goto out;
167         INIT_RCU_HEAD(&in_dev->rcu_head);
168         memcpy(&in_dev->cnf, dev->nd_net->ipv4.devconf_dflt,
169                         sizeof(in_dev->cnf));
170         in_dev->cnf.sysctl = NULL;
171         in_dev->dev = dev;
172         if ((in_dev->arp_parms = neigh_parms_alloc(dev, &arp_tbl)) == NULL)
173                 goto out_kfree;
174         /* Reference in_dev->dev */
175         dev_hold(dev);
176         /* Account for reference dev->ip_ptr (below) */
177         in_dev_hold(in_dev);
178
179         devinet_sysctl_register(in_dev);
180         ip_mc_init_dev(in_dev);
181         if (dev->flags & IFF_UP)
182                 ip_mc_up(in_dev);
183
184         /* we can receive as soon as ip_ptr is set -- do this last */
185         rcu_assign_pointer(dev->ip_ptr, in_dev);
186 out:
187         return in_dev;
188 out_kfree:
189         kfree(in_dev);
190         in_dev = NULL;
191         goto out;
192 }
193
194 static void in_dev_rcu_put(struct rcu_head *head)
195 {
196         struct in_device *idev = container_of(head, struct in_device, rcu_head);
197         in_dev_put(idev);
198 }
199
200 static void inetdev_destroy(struct in_device *in_dev)
201 {
202         struct in_ifaddr *ifa;
203         struct net_device *dev;
204
205         ASSERT_RTNL();
206
207         dev = in_dev->dev;
208
209         in_dev->dead = 1;
210
211         ip_mc_destroy_dev(in_dev);
212
213         while ((ifa = in_dev->ifa_list) != NULL) {
214                 inet_del_ifa(in_dev, &in_dev->ifa_list, 0);
215                 inet_free_ifa(ifa);
216         }
217
218         dev->ip_ptr = NULL;
219
220         devinet_sysctl_unregister(in_dev);
221         neigh_parms_release(&arp_tbl, in_dev->arp_parms);
222         arp_ifdown(dev);
223
224         call_rcu(&in_dev->rcu_head, in_dev_rcu_put);
225 }
226
227 int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b)
228 {
229         rcu_read_lock();
230         for_primary_ifa(in_dev) {
231                 if (inet_ifa_match(a, ifa)) {
232                         if (!b || inet_ifa_match(b, ifa)) {
233                                 rcu_read_unlock();
234                                 return 1;
235                         }
236                 }
237         } endfor_ifa(in_dev);
238         rcu_read_unlock();
239         return 0;
240 }
241
242 static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
243                          int destroy, struct nlmsghdr *nlh, u32 pid)
244 {
245         struct in_ifaddr *promote = NULL;
246         struct in_ifaddr *ifa, *ifa1 = *ifap;
247         struct in_ifaddr *last_prim = in_dev->ifa_list;
248         struct in_ifaddr *prev_prom = NULL;
249         int do_promote = IN_DEV_PROMOTE_SECONDARIES(in_dev);
250
251         ASSERT_RTNL();
252
253         /* 1. Deleting primary ifaddr forces deletion all secondaries
254          * unless alias promotion is set
255          **/
256
257         if (!(ifa1->ifa_flags & IFA_F_SECONDARY)) {
258                 struct in_ifaddr **ifap1 = &ifa1->ifa_next;
259
260                 while ((ifa = *ifap1) != NULL) {
261                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) &&
262                             ifa1->ifa_scope <= ifa->ifa_scope)
263                                 last_prim = ifa;
264
265                         if (!(ifa->ifa_flags & IFA_F_SECONDARY) ||
266                             ifa1->ifa_mask != ifa->ifa_mask ||
267                             !inet_ifa_match(ifa1->ifa_address, ifa)) {
268                                 ifap1 = &ifa->ifa_next;
269                                 prev_prom = ifa;
270                                 continue;
271                         }
272
273                         if (!do_promote) {
274                                 *ifap1 = ifa->ifa_next;
275
276                                 rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid);
277                                 blocking_notifier_call_chain(&inetaddr_chain,
278                                                 NETDEV_DOWN, ifa);
279                                 inet_free_ifa(ifa);
280                         } else {
281                                 promote = ifa;
282                                 break;
283                         }
284                 }
285         }
286
287         /* 2. Unlink it */
288
289         *ifap = ifa1->ifa_next;
290
291         /* 3. Announce address deletion */
292
293         /* Send message first, then call notifier.
294            At first sight, FIB update triggered by notifier
295            will refer to already deleted ifaddr, that could confuse
296            netlink listeners. It is not true: look, gated sees
297            that route deleted and if it still thinks that ifaddr
298            is valid, it will try to restore deleted routes... Grr.
299            So that, this order is correct.
300          */
301         rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid);
302         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1);
303
304         if (promote) {
305
306                 if (prev_prom) {
307                         prev_prom->ifa_next = promote->ifa_next;
308                         promote->ifa_next = last_prim->ifa_next;
309                         last_prim->ifa_next = promote;
310                 }
311
312                 promote->ifa_flags &= ~IFA_F_SECONDARY;
313                 rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid);
314                 blocking_notifier_call_chain(&inetaddr_chain,
315                                 NETDEV_UP, promote);
316                 for (ifa = promote->ifa_next; ifa; ifa = ifa->ifa_next) {
317                         if (ifa1->ifa_mask != ifa->ifa_mask ||
318                             !inet_ifa_match(ifa1->ifa_address, ifa))
319                                         continue;
320                         fib_add_ifaddr(ifa);
321                 }
322
323         }
324         if (destroy)
325                 inet_free_ifa(ifa1);
326 }
327
328 static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap,
329                          int destroy)
330 {
331         __inet_del_ifa(in_dev, ifap, destroy, NULL, 0);
332 }
333
334 static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh,
335                              u32 pid)
336 {
337         struct in_device *in_dev = ifa->ifa_dev;
338         struct in_ifaddr *ifa1, **ifap, **last_primary;
339
340         ASSERT_RTNL();
341
342         if (!ifa->ifa_local) {
343                 inet_free_ifa(ifa);
344                 return 0;
345         }
346
347         ifa->ifa_flags &= ~IFA_F_SECONDARY;
348         last_primary = &in_dev->ifa_list;
349
350         for (ifap = &in_dev->ifa_list; (ifa1 = *ifap) != NULL;
351              ifap = &ifa1->ifa_next) {
352                 if (!(ifa1->ifa_flags & IFA_F_SECONDARY) &&
353                     ifa->ifa_scope <= ifa1->ifa_scope)
354                         last_primary = &ifa1->ifa_next;
355                 if (ifa1->ifa_mask == ifa->ifa_mask &&
356                     inet_ifa_match(ifa1->ifa_address, ifa)) {
357                         if (ifa1->ifa_local == ifa->ifa_local) {
358                                 inet_free_ifa(ifa);
359                                 return -EEXIST;
360                         }
361                         if (ifa1->ifa_scope != ifa->ifa_scope) {
362                                 inet_free_ifa(ifa);
363                                 return -EINVAL;
364                         }
365                         ifa->ifa_flags |= IFA_F_SECONDARY;
366                 }
367         }
368
369         if (!(ifa->ifa_flags & IFA_F_SECONDARY)) {
370                 net_srandom(ifa->ifa_local);
371                 ifap = last_primary;
372         }
373
374         ifa->ifa_next = *ifap;
375         *ifap = ifa;
376
377         /* Send message first, then call notifier.
378            Notifier will trigger FIB update, so that
379            listeners of netlink will know about new ifaddr */
380         rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid);
381         blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa);
382
383         return 0;
384 }
385
386 static int inet_insert_ifa(struct in_ifaddr *ifa)
387 {
388         return __inet_insert_ifa(ifa, NULL, 0);
389 }
390
391 static int inet_set_ifa(struct net_device *dev, struct in_ifaddr *ifa)
392 {
393         struct in_device *in_dev = __in_dev_get_rtnl(dev);
394
395         ASSERT_RTNL();
396
397         if (!in_dev) {
398                 inet_free_ifa(ifa);
399                 return -ENOBUFS;
400         }
401         ipv4_devconf_setall(in_dev);
402         if (ifa->ifa_dev != in_dev) {
403                 BUG_TRAP(!ifa->ifa_dev);
404                 in_dev_hold(in_dev);
405                 ifa->ifa_dev = in_dev;
406         }
407         if (ipv4_is_loopback(ifa->ifa_local))
408                 ifa->ifa_scope = RT_SCOPE_HOST;
409         return inet_insert_ifa(ifa);
410 }
411
412 struct in_device *inetdev_by_index(struct net *net, int ifindex)
413 {
414         struct net_device *dev;
415         struct in_device *in_dev = NULL;
416         read_lock(&dev_base_lock);
417         dev = __dev_get_by_index(net, ifindex);
418         if (dev)
419                 in_dev = in_dev_get(dev);
420         read_unlock(&dev_base_lock);
421         return in_dev;
422 }
423
424 /* Called only from RTNL semaphored context. No locks. */
425
426 struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix,
427                                     __be32 mask)
428 {
429         ASSERT_RTNL();
430
431         for_primary_ifa(in_dev) {
432                 if (ifa->ifa_mask == mask && inet_ifa_match(prefix, ifa))
433                         return ifa;
434         } endfor_ifa(in_dev);
435         return NULL;
436 }
437
438 static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
439 {
440         struct net *net = skb->sk->sk_net;
441         struct nlattr *tb[IFA_MAX+1];
442         struct in_device *in_dev;
443         struct ifaddrmsg *ifm;
444         struct in_ifaddr *ifa, **ifap;
445         int err = -EINVAL;
446
447         ASSERT_RTNL();
448
449         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
450         if (err < 0)
451                 goto errout;
452
453         ifm = nlmsg_data(nlh);
454         in_dev = inetdev_by_index(net, ifm->ifa_index);
455         if (in_dev == NULL) {
456                 err = -ENODEV;
457                 goto errout;
458         }
459
460         __in_dev_put(in_dev);
461
462         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
463              ifap = &ifa->ifa_next) {
464                 if (tb[IFA_LOCAL] &&
465                     ifa->ifa_local != nla_get_be32(tb[IFA_LOCAL]))
466                         continue;
467
468                 if (tb[IFA_LABEL] && nla_strcmp(tb[IFA_LABEL], ifa->ifa_label))
469                         continue;
470
471                 if (tb[IFA_ADDRESS] &&
472                     (ifm->ifa_prefixlen != ifa->ifa_prefixlen ||
473                     !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa)))
474                         continue;
475
476                 __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid);
477                 return 0;
478         }
479
480         err = -EADDRNOTAVAIL;
481 errout:
482         return err;
483 }
484
485 static struct in_ifaddr *rtm_to_ifaddr(struct net *net, struct nlmsghdr *nlh)
486 {
487         struct nlattr *tb[IFA_MAX+1];
488         struct in_ifaddr *ifa;
489         struct ifaddrmsg *ifm;
490         struct net_device *dev;
491         struct in_device *in_dev;
492         int err;
493
494         err = nlmsg_parse(nlh, sizeof(*ifm), tb, IFA_MAX, ifa_ipv4_policy);
495         if (err < 0)
496                 goto errout;
497
498         ifm = nlmsg_data(nlh);
499         err = -EINVAL;
500         if (ifm->ifa_prefixlen > 32 || tb[IFA_LOCAL] == NULL)
501                 goto errout;
502
503         dev = __dev_get_by_index(net, ifm->ifa_index);
504         err = -ENODEV;
505         if (dev == NULL)
506                 goto errout;
507
508         in_dev = __in_dev_get_rtnl(dev);
509         err = -ENOBUFS;
510         if (in_dev == NULL)
511                 goto errout;
512
513         ifa = inet_alloc_ifa();
514         if (ifa == NULL)
515                 /*
516                  * A potential indev allocation can be left alive, it stays
517                  * assigned to its device and is destroy with it.
518                  */
519                 goto errout;
520
521         ipv4_devconf_setall(in_dev);
522         in_dev_hold(in_dev);
523
524         if (tb[IFA_ADDRESS] == NULL)
525                 tb[IFA_ADDRESS] = tb[IFA_LOCAL];
526
527         ifa->ifa_prefixlen = ifm->ifa_prefixlen;
528         ifa->ifa_mask = inet_make_mask(ifm->ifa_prefixlen);
529         ifa->ifa_flags = ifm->ifa_flags;
530         ifa->ifa_scope = ifm->ifa_scope;
531         ifa->ifa_dev = in_dev;
532
533         ifa->ifa_local = nla_get_be32(tb[IFA_LOCAL]);
534         ifa->ifa_address = nla_get_be32(tb[IFA_ADDRESS]);
535
536         if (tb[IFA_BROADCAST])
537                 ifa->ifa_broadcast = nla_get_be32(tb[IFA_BROADCAST]);
538
539         if (tb[IFA_ANYCAST])
540                 ifa->ifa_anycast = nla_get_be32(tb[IFA_ANYCAST]);
541
542         if (tb[IFA_LABEL])
543                 nla_strlcpy(ifa->ifa_label, tb[IFA_LABEL], IFNAMSIZ);
544         else
545                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
546
547         return ifa;
548
549 errout:
550         return ERR_PTR(err);
551 }
552
553 static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg)
554 {
555         struct net *net = skb->sk->sk_net;
556         struct in_ifaddr *ifa;
557
558         ASSERT_RTNL();
559
560         ifa = rtm_to_ifaddr(net, nlh);
561         if (IS_ERR(ifa))
562                 return PTR_ERR(ifa);
563
564         return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid);
565 }
566
567 /*
568  *      Determine a default network mask, based on the IP address.
569  */
570
571 static __inline__ int inet_abc_len(__be32 addr)
572 {
573         int rc = -1;    /* Something else, probably a multicast. */
574
575         if (ipv4_is_zeronet(addr))
576                 rc = 0;
577         else {
578                 __u32 haddr = ntohl(addr);
579
580                 if (IN_CLASSA(haddr))
581                         rc = 8;
582                 else if (IN_CLASSB(haddr))
583                         rc = 16;
584                 else if (IN_CLASSC(haddr))
585                         rc = 24;
586         }
587
588         return rc;
589 }
590
591
592 int devinet_ioctl(struct net *net, unsigned int cmd, void __user *arg)
593 {
594         struct ifreq ifr;
595         struct sockaddr_in sin_orig;
596         struct sockaddr_in *sin = (struct sockaddr_in *)&ifr.ifr_addr;
597         struct in_device *in_dev;
598         struct in_ifaddr **ifap = NULL;
599         struct in_ifaddr *ifa = NULL;
600         struct net_device *dev;
601         char *colon;
602         int ret = -EFAULT;
603         int tryaddrmatch = 0;
604
605         /*
606          *      Fetch the caller's info block into kernel space
607          */
608
609         if (copy_from_user(&ifr, arg, sizeof(struct ifreq)))
610                 goto out;
611         ifr.ifr_name[IFNAMSIZ - 1] = 0;
612
613         /* save original address for comparison */
614         memcpy(&sin_orig, sin, sizeof(*sin));
615
616         colon = strchr(ifr.ifr_name, ':');
617         if (colon)
618                 *colon = 0;
619
620 #ifdef CONFIG_KMOD
621         dev_load(net, ifr.ifr_name);
622 #endif
623
624         switch (cmd) {
625         case SIOCGIFADDR:       /* Get interface address */
626         case SIOCGIFBRDADDR:    /* Get the broadcast address */
627         case SIOCGIFDSTADDR:    /* Get the destination address */
628         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
629                 /* Note that these ioctls will not sleep,
630                    so that we do not impose a lock.
631                    One day we will be forced to put shlock here (I mean SMP)
632                  */
633                 tryaddrmatch = (sin_orig.sin_family == AF_INET);
634                 memset(sin, 0, sizeof(*sin));
635                 sin->sin_family = AF_INET;
636                 break;
637
638         case SIOCSIFFLAGS:
639                 ret = -EACCES;
640                 if (!capable(CAP_NET_ADMIN))
641                         goto out;
642                 break;
643         case SIOCSIFADDR:       /* Set interface address (and family) */
644         case SIOCSIFBRDADDR:    /* Set the broadcast address */
645         case SIOCSIFDSTADDR:    /* Set the destination address */
646         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
647                 ret = -EACCES;
648                 if (!capable(CAP_NET_ADMIN))
649                         goto out;
650                 ret = -EINVAL;
651                 if (sin->sin_family != AF_INET)
652                         goto out;
653                 break;
654         default:
655                 ret = -EINVAL;
656                 goto out;
657         }
658
659         rtnl_lock();
660
661         ret = -ENODEV;
662         if ((dev = __dev_get_by_name(net, ifr.ifr_name)) == NULL)
663                 goto done;
664
665         if (colon)
666                 *colon = ':';
667
668         if ((in_dev = __in_dev_get_rtnl(dev)) != NULL) {
669                 if (tryaddrmatch) {
670                         /* Matthias Andree */
671                         /* compare label and address (4.4BSD style) */
672                         /* note: we only do this for a limited set of ioctls
673                            and only if the original address family was AF_INET.
674                            This is checked above. */
675                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
676                              ifap = &ifa->ifa_next) {
677                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label) &&
678                                     sin_orig.sin_addr.s_addr ==
679                                                         ifa->ifa_address) {
680                                         break; /* found */
681                                 }
682                         }
683                 }
684                 /* we didn't get a match, maybe the application is
685                    4.3BSD-style and passed in junk so we fall back to
686                    comparing just the label */
687                 if (!ifa) {
688                         for (ifap = &in_dev->ifa_list; (ifa = *ifap) != NULL;
689                              ifap = &ifa->ifa_next)
690                                 if (!strcmp(ifr.ifr_name, ifa->ifa_label))
691                                         break;
692                 }
693         }
694
695         ret = -EADDRNOTAVAIL;
696         if (!ifa && cmd != SIOCSIFADDR && cmd != SIOCSIFFLAGS)
697                 goto done;
698
699         switch (cmd) {
700         case SIOCGIFADDR:       /* Get interface address */
701                 sin->sin_addr.s_addr = ifa->ifa_local;
702                 goto rarok;
703
704         case SIOCGIFBRDADDR:    /* Get the broadcast address */
705                 sin->sin_addr.s_addr = ifa->ifa_broadcast;
706                 goto rarok;
707
708         case SIOCGIFDSTADDR:    /* Get the destination address */
709                 sin->sin_addr.s_addr = ifa->ifa_address;
710                 goto rarok;
711
712         case SIOCGIFNETMASK:    /* Get the netmask for the interface */
713                 sin->sin_addr.s_addr = ifa->ifa_mask;
714                 goto rarok;
715
716         case SIOCSIFFLAGS:
717                 if (colon) {
718                         ret = -EADDRNOTAVAIL;
719                         if (!ifa)
720                                 break;
721                         ret = 0;
722                         if (!(ifr.ifr_flags & IFF_UP))
723                                 inet_del_ifa(in_dev, ifap, 1);
724                         break;
725                 }
726                 ret = dev_change_flags(dev, ifr.ifr_flags);
727                 break;
728
729         case SIOCSIFADDR:       /* Set interface address (and family) */
730                 ret = -EINVAL;
731                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
732                         break;
733
734                 if (!ifa) {
735                         ret = -ENOBUFS;
736                         if ((ifa = inet_alloc_ifa()) == NULL)
737                                 break;
738                         if (colon)
739                                 memcpy(ifa->ifa_label, ifr.ifr_name, IFNAMSIZ);
740                         else
741                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
742                 } else {
743                         ret = 0;
744                         if (ifa->ifa_local == sin->sin_addr.s_addr)
745                                 break;
746                         inet_del_ifa(in_dev, ifap, 0);
747                         ifa->ifa_broadcast = 0;
748                         ifa->ifa_anycast = 0;
749                         ifa->ifa_scope = 0;
750                 }
751
752                 ifa->ifa_address = ifa->ifa_local = sin->sin_addr.s_addr;
753
754                 if (!(dev->flags & IFF_POINTOPOINT)) {
755                         ifa->ifa_prefixlen = inet_abc_len(ifa->ifa_address);
756                         ifa->ifa_mask = inet_make_mask(ifa->ifa_prefixlen);
757                         if ((dev->flags & IFF_BROADCAST) &&
758                             ifa->ifa_prefixlen < 31)
759                                 ifa->ifa_broadcast = ifa->ifa_address |
760                                                      ~ifa->ifa_mask;
761                 } else {
762                         ifa->ifa_prefixlen = 32;
763                         ifa->ifa_mask = inet_make_mask(32);
764                 }
765                 ret = inet_set_ifa(dev, ifa);
766                 break;
767
768         case SIOCSIFBRDADDR:    /* Set the broadcast address */
769                 ret = 0;
770                 if (ifa->ifa_broadcast != sin->sin_addr.s_addr) {
771                         inet_del_ifa(in_dev, ifap, 0);
772                         ifa->ifa_broadcast = sin->sin_addr.s_addr;
773                         inet_insert_ifa(ifa);
774                 }
775                 break;
776
777         case SIOCSIFDSTADDR:    /* Set the destination address */
778                 ret = 0;
779                 if (ifa->ifa_address == sin->sin_addr.s_addr)
780                         break;
781                 ret = -EINVAL;
782                 if (inet_abc_len(sin->sin_addr.s_addr) < 0)
783                         break;
784                 ret = 0;
785                 inet_del_ifa(in_dev, ifap, 0);
786                 ifa->ifa_address = sin->sin_addr.s_addr;
787                 inet_insert_ifa(ifa);
788                 break;
789
790         case SIOCSIFNETMASK:    /* Set the netmask for the interface */
791
792                 /*
793                  *      The mask we set must be legal.
794                  */
795                 ret = -EINVAL;
796                 if (bad_mask(sin->sin_addr.s_addr, 0))
797                         break;
798                 ret = 0;
799                 if (ifa->ifa_mask != sin->sin_addr.s_addr) {
800                         __be32 old_mask = ifa->ifa_mask;
801                         inet_del_ifa(in_dev, ifap, 0);
802                         ifa->ifa_mask = sin->sin_addr.s_addr;
803                         ifa->ifa_prefixlen = inet_mask_len(ifa->ifa_mask);
804
805                         /* See if current broadcast address matches
806                          * with current netmask, then recalculate
807                          * the broadcast address. Otherwise it's a
808                          * funny address, so don't touch it since
809                          * the user seems to know what (s)he's doing...
810                          */
811                         if ((dev->flags & IFF_BROADCAST) &&
812                             (ifa->ifa_prefixlen < 31) &&
813                             (ifa->ifa_broadcast ==
814                              (ifa->ifa_local|~old_mask))) {
815                                 ifa->ifa_broadcast = (ifa->ifa_local |
816                                                       ~sin->sin_addr.s_addr);
817                         }
818                         inet_insert_ifa(ifa);
819                 }
820                 break;
821         }
822 done:
823         rtnl_unlock();
824 out:
825         return ret;
826 rarok:
827         rtnl_unlock();
828         ret = copy_to_user(arg, &ifr, sizeof(struct ifreq)) ? -EFAULT : 0;
829         goto out;
830 }
831
832 static int inet_gifconf(struct net_device *dev, char __user *buf, int len)
833 {
834         struct in_device *in_dev = __in_dev_get_rtnl(dev);
835         struct in_ifaddr *ifa;
836         struct ifreq ifr;
837         int done = 0;
838
839         if (!in_dev || (ifa = in_dev->ifa_list) == NULL)
840                 goto out;
841
842         for (; ifa; ifa = ifa->ifa_next) {
843                 if (!buf) {
844                         done += sizeof(ifr);
845                         continue;
846                 }
847                 if (len < (int) sizeof(ifr))
848                         break;
849                 memset(&ifr, 0, sizeof(struct ifreq));
850                 if (ifa->ifa_label)
851                         strcpy(ifr.ifr_name, ifa->ifa_label);
852                 else
853                         strcpy(ifr.ifr_name, dev->name);
854
855                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_family = AF_INET;
856                 (*(struct sockaddr_in *)&ifr.ifr_addr).sin_addr.s_addr =
857                                                                 ifa->ifa_local;
858
859                 if (copy_to_user(buf, &ifr, sizeof(struct ifreq))) {
860                         done = -EFAULT;
861                         break;
862                 }
863                 buf  += sizeof(struct ifreq);
864                 len  -= sizeof(struct ifreq);
865                 done += sizeof(struct ifreq);
866         }
867 out:
868         return done;
869 }
870
871 __be32 inet_select_addr(const struct net_device *dev, __be32 dst, int scope)
872 {
873         __be32 addr = 0;
874         struct in_device *in_dev;
875
876         rcu_read_lock();
877         in_dev = __in_dev_get_rcu(dev);
878         if (!in_dev)
879                 goto no_in_dev;
880
881         for_primary_ifa(in_dev) {
882                 if (ifa->ifa_scope > scope)
883                         continue;
884                 if (!dst || inet_ifa_match(dst, ifa)) {
885                         addr = ifa->ifa_local;
886                         break;
887                 }
888                 if (!addr)
889                         addr = ifa->ifa_local;
890         } endfor_ifa(in_dev);
891 no_in_dev:
892         rcu_read_unlock();
893
894         if (addr)
895                 goto out;
896
897         /* Not loopback addresses on loopback should be preferred
898            in this case. It is importnat that lo is the first interface
899            in dev_base list.
900          */
901         read_lock(&dev_base_lock);
902         rcu_read_lock();
903         for_each_netdev(&init_net, dev) {
904                 if ((in_dev = __in_dev_get_rcu(dev)) == NULL)
905                         continue;
906
907                 for_primary_ifa(in_dev) {
908                         if (ifa->ifa_scope != RT_SCOPE_LINK &&
909                             ifa->ifa_scope <= scope) {
910                                 addr = ifa->ifa_local;
911                                 goto out_unlock_both;
912                         }
913                 } endfor_ifa(in_dev);
914         }
915 out_unlock_both:
916         read_unlock(&dev_base_lock);
917         rcu_read_unlock();
918 out:
919         return addr;
920 }
921
922 static __be32 confirm_addr_indev(struct in_device *in_dev, __be32 dst,
923                               __be32 local, int scope)
924 {
925         int same = 0;
926         __be32 addr = 0;
927
928         for_ifa(in_dev) {
929                 if (!addr &&
930                     (local == ifa->ifa_local || !local) &&
931                     ifa->ifa_scope <= scope) {
932                         addr = ifa->ifa_local;
933                         if (same)
934                                 break;
935                 }
936                 if (!same) {
937                         same = (!local || inet_ifa_match(local, ifa)) &&
938                                 (!dst || inet_ifa_match(dst, ifa));
939                         if (same && addr) {
940                                 if (local || !dst)
941                                         break;
942                                 /* Is the selected addr into dst subnet? */
943                                 if (inet_ifa_match(addr, ifa))
944                                         break;
945                                 /* No, then can we use new local src? */
946                                 if (ifa->ifa_scope <= scope) {
947                                         addr = ifa->ifa_local;
948                                         break;
949                                 }
950                                 /* search for large dst subnet for addr */
951                                 same = 0;
952                         }
953                 }
954         } endfor_ifa(in_dev);
955
956         return same? addr : 0;
957 }
958
959 /*
960  * Confirm that local IP address exists using wildcards:
961  * - in_dev: only on this interface, 0=any interface
962  * - dst: only in the same subnet as dst, 0=any dst
963  * - local: address, 0=autoselect the local address
964  * - scope: maximum allowed scope value for the local address
965  */
966 __be32 inet_confirm_addr(struct in_device *in_dev,
967                          __be32 dst, __be32 local, int scope)
968 {
969         __be32 addr = 0;
970         struct net_device *dev;
971         struct net *net;
972
973         if (scope != RT_SCOPE_LINK)
974                 return confirm_addr_indev(in_dev, dst, local, scope);
975
976         net = in_dev->dev->nd_net;
977         read_lock(&dev_base_lock);
978         rcu_read_lock();
979         for_each_netdev(net, dev) {
980                 if ((in_dev = __in_dev_get_rcu(dev))) {
981                         addr = confirm_addr_indev(in_dev, dst, local, scope);
982                         if (addr)
983                                 break;
984                 }
985         }
986         rcu_read_unlock();
987         read_unlock(&dev_base_lock);
988
989         return addr;
990 }
991
992 /*
993  *      Device notifier
994  */
995
996 int register_inetaddr_notifier(struct notifier_block *nb)
997 {
998         return blocking_notifier_chain_register(&inetaddr_chain, nb);
999 }
1000
1001 int unregister_inetaddr_notifier(struct notifier_block *nb)
1002 {
1003         return blocking_notifier_chain_unregister(&inetaddr_chain, nb);
1004 }
1005
1006 /* Rename ifa_labels for a device name change. Make some effort to preserve existing
1007  * alias numbering and to create unique labels if possible.
1008 */
1009 static void inetdev_changename(struct net_device *dev, struct in_device *in_dev)
1010 {
1011         struct in_ifaddr *ifa;
1012         int named = 0;
1013
1014         for (ifa = in_dev->ifa_list; ifa; ifa = ifa->ifa_next) {
1015                 char old[IFNAMSIZ], *dot;
1016
1017                 memcpy(old, ifa->ifa_label, IFNAMSIZ);
1018                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1019                 if (named++ == 0)
1020                         continue;
1021                 dot = strchr(old, ':');
1022                 if (dot == NULL) {
1023                         sprintf(old, ":%d", named);
1024                         dot = old;
1025                 }
1026                 if (strlen(dot) + strlen(dev->name) < IFNAMSIZ) {
1027                         strcat(ifa->ifa_label, dot);
1028                 } else {
1029                         strcpy(ifa->ifa_label + (IFNAMSIZ - strlen(dot) - 1), dot);
1030                 }
1031         }
1032 }
1033
1034 /* Called only under RTNL semaphore */
1035
1036 static int inetdev_event(struct notifier_block *this, unsigned long event,
1037                          void *ptr)
1038 {
1039         struct net_device *dev = ptr;
1040         struct in_device *in_dev = __in_dev_get_rtnl(dev);
1041
1042         ASSERT_RTNL();
1043
1044         if (!in_dev) {
1045                 if (event == NETDEV_REGISTER) {
1046                         in_dev = inetdev_init(dev);
1047                         if (!in_dev)
1048                                 return notifier_from_errno(-ENOMEM);
1049                         if (dev->flags & IFF_LOOPBACK) {
1050                                 IN_DEV_CONF_SET(in_dev, NOXFRM, 1);
1051                                 IN_DEV_CONF_SET(in_dev, NOPOLICY, 1);
1052                         }
1053                 }
1054                 goto out;
1055         }
1056
1057         switch (event) {
1058         case NETDEV_REGISTER:
1059                 printk(KERN_DEBUG "inetdev_event: bug\n");
1060                 dev->ip_ptr = NULL;
1061                 break;
1062         case NETDEV_UP:
1063                 if (dev->mtu < 68)
1064                         break;
1065                 if (dev->flags & IFF_LOOPBACK) {
1066                         struct in_ifaddr *ifa;
1067                         if ((ifa = inet_alloc_ifa()) != NULL) {
1068                                 ifa->ifa_local =
1069                                   ifa->ifa_address = htonl(INADDR_LOOPBACK);
1070                                 ifa->ifa_prefixlen = 8;
1071                                 ifa->ifa_mask = inet_make_mask(8);
1072                                 in_dev_hold(in_dev);
1073                                 ifa->ifa_dev = in_dev;
1074                                 ifa->ifa_scope = RT_SCOPE_HOST;
1075                                 memcpy(ifa->ifa_label, dev->name, IFNAMSIZ);
1076                                 inet_insert_ifa(ifa);
1077                         }
1078                 }
1079                 ip_mc_up(in_dev);
1080                 break;
1081         case NETDEV_DOWN:
1082                 ip_mc_down(in_dev);
1083                 break;
1084         case NETDEV_CHANGEMTU:
1085                 if (dev->mtu >= 68)
1086                         break;
1087                 /* MTU falled under 68, disable IP */
1088         case NETDEV_UNREGISTER:
1089                 inetdev_destroy(in_dev);
1090                 break;
1091         case NETDEV_CHANGENAME:
1092                 /* Do not notify about label change, this event is
1093                  * not interesting to applications using netlink.
1094                  */
1095                 inetdev_changename(dev, in_dev);
1096
1097                 devinet_sysctl_unregister(in_dev);
1098                 devinet_sysctl_register(in_dev);
1099                 break;
1100         }
1101 out:
1102         return NOTIFY_DONE;
1103 }
1104
1105 static struct notifier_block ip_netdev_notifier = {
1106         .notifier_call =inetdev_event,
1107 };
1108
1109 static inline size_t inet_nlmsg_size(void)
1110 {
1111         return NLMSG_ALIGN(sizeof(struct ifaddrmsg))
1112                + nla_total_size(4) /* IFA_ADDRESS */
1113                + nla_total_size(4) /* IFA_LOCAL */
1114                + nla_total_size(4) /* IFA_BROADCAST */
1115                + nla_total_size(4) /* IFA_ANYCAST */
1116                + nla_total_size(IFNAMSIZ); /* IFA_LABEL */
1117 }
1118
1119 static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa,
1120                             u32 pid, u32 seq, int event, unsigned int flags)
1121 {
1122         struct ifaddrmsg *ifm;
1123         struct nlmsghdr  *nlh;
1124
1125         nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags);
1126         if (nlh == NULL)
1127                 return -EMSGSIZE;
1128
1129         ifm = nlmsg_data(nlh);
1130         ifm->ifa_family = AF_INET;
1131         ifm->ifa_prefixlen = ifa->ifa_prefixlen;
1132         ifm->ifa_flags = ifa->ifa_flags|IFA_F_PERMANENT;
1133         ifm->ifa_scope = ifa->ifa_scope;
1134         ifm->ifa_index = ifa->ifa_dev->dev->ifindex;
1135
1136         if (ifa->ifa_address)
1137                 NLA_PUT_BE32(skb, IFA_ADDRESS, ifa->ifa_address);
1138
1139         if (ifa->ifa_local)
1140                 NLA_PUT_BE32(skb, IFA_LOCAL, ifa->ifa_local);
1141
1142         if (ifa->ifa_broadcast)
1143                 NLA_PUT_BE32(skb, IFA_BROADCAST, ifa->ifa_broadcast);
1144
1145         if (ifa->ifa_anycast)
1146                 NLA_PUT_BE32(skb, IFA_ANYCAST, ifa->ifa_anycast);
1147
1148         if (ifa->ifa_label[0])
1149                 NLA_PUT_STRING(skb, IFA_LABEL, ifa->ifa_label);
1150
1151         return nlmsg_end(skb, nlh);
1152
1153 nla_put_failure:
1154         nlmsg_cancel(skb, nlh);
1155         return -EMSGSIZE;
1156 }
1157
1158 static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb)
1159 {
1160         struct net *net = skb->sk->sk_net;
1161         int idx, ip_idx;
1162         struct net_device *dev;
1163         struct in_device *in_dev;
1164         struct in_ifaddr *ifa;
1165         int s_ip_idx, s_idx = cb->args[0];
1166
1167         s_ip_idx = ip_idx = cb->args[1];
1168         idx = 0;
1169         for_each_netdev(net, dev) {
1170                 if (idx < s_idx)
1171                         goto cont;
1172                 if (idx > s_idx)
1173                         s_ip_idx = 0;
1174                 if ((in_dev = __in_dev_get_rtnl(dev)) == NULL)
1175                         goto cont;
1176
1177                 for (ifa = in_dev->ifa_list, ip_idx = 0; ifa;
1178                      ifa = ifa->ifa_next, ip_idx++) {
1179                         if (ip_idx < s_ip_idx)
1180                                 continue;
1181                         if (inet_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid,
1182                                              cb->nlh->nlmsg_seq,
1183                                              RTM_NEWADDR, NLM_F_MULTI) <= 0)
1184                                 goto done;
1185                 }
1186 cont:
1187                 idx++;
1188         }
1189
1190 done:
1191         cb->args[0] = idx;
1192         cb->args[1] = ip_idx;
1193
1194         return skb->len;
1195 }
1196
1197 static void rtmsg_ifa(int event, struct in_ifaddr* ifa, struct nlmsghdr *nlh,
1198                       u32 pid)
1199 {
1200         struct sk_buff *skb;
1201         u32 seq = nlh ? nlh->nlmsg_seq : 0;
1202         int err = -ENOBUFS;
1203         struct net *net;
1204
1205         net = ifa->ifa_dev->dev->nd_net;
1206         skb = nlmsg_new(inet_nlmsg_size(), GFP_KERNEL);
1207         if (skb == NULL)
1208                 goto errout;
1209
1210         err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0);
1211         if (err < 0) {
1212                 /* -EMSGSIZE implies BUG in inet_nlmsg_size() */
1213                 WARN_ON(err == -EMSGSIZE);
1214                 kfree_skb(skb);
1215                 goto errout;
1216         }
1217         err = rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL);
1218 errout:
1219         if (err < 0)
1220                 rtnl_set_sk_err(net, RTNLGRP_IPV4_IFADDR, err);
1221 }
1222
1223 #ifdef CONFIG_SYSCTL
1224
1225 static void devinet_copy_dflt_conf(struct net *net, int i)
1226 {
1227         struct net_device *dev;
1228
1229         read_lock(&dev_base_lock);
1230         for_each_netdev(net, dev) {
1231                 struct in_device *in_dev;
1232                 rcu_read_lock();
1233                 in_dev = __in_dev_get_rcu(dev);
1234                 if (in_dev && !test_bit(i, in_dev->cnf.state))
1235                         in_dev->cnf.data[i] = net->ipv4.devconf_dflt->data[i];
1236                 rcu_read_unlock();
1237         }
1238         read_unlock(&dev_base_lock);
1239 }
1240
1241 static void inet_forward_change(struct net *net)
1242 {
1243         struct net_device *dev;
1244         int on = IPV4_DEVCONF_ALL(net, FORWARDING);
1245
1246         IPV4_DEVCONF_ALL(net, ACCEPT_REDIRECTS) = !on;
1247         IPV4_DEVCONF_DFLT(net, FORWARDING) = on;
1248
1249         read_lock(&dev_base_lock);
1250         for_each_netdev(net, dev) {
1251                 struct in_device *in_dev;
1252                 rcu_read_lock();
1253                 in_dev = __in_dev_get_rcu(dev);
1254                 if (in_dev)
1255                         IN_DEV_CONF_SET(in_dev, FORWARDING, on);
1256                 rcu_read_unlock();
1257         }
1258         read_unlock(&dev_base_lock);
1259
1260         rt_cache_flush(0);
1261 }
1262
1263 static int devinet_conf_proc(ctl_table *ctl, int write,
1264                              struct file* filp, void __user *buffer,
1265                              size_t *lenp, loff_t *ppos)
1266 {
1267         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1268
1269         if (write) {
1270                 struct ipv4_devconf *cnf = ctl->extra1;
1271                 struct net *net = ctl->extra2;
1272                 int i = (int *)ctl->data - cnf->data;
1273
1274                 set_bit(i, cnf->state);
1275
1276                 if (cnf == net->ipv4.devconf_dflt)
1277                         devinet_copy_dflt_conf(net, i);
1278         }
1279
1280         return ret;
1281 }
1282
1283 static int devinet_conf_sysctl(ctl_table *table, int __user *name, int nlen,
1284                                void __user *oldval, size_t __user *oldlenp,
1285                                void __user *newval, size_t newlen)
1286 {
1287         struct ipv4_devconf *cnf;
1288         struct net *net;
1289         int *valp = table->data;
1290         int new;
1291         int i;
1292
1293         if (!newval || !newlen)
1294                 return 0;
1295
1296         if (newlen != sizeof(int))
1297                 return -EINVAL;
1298
1299         if (get_user(new, (int __user *)newval))
1300                 return -EFAULT;
1301
1302         if (new == *valp)
1303                 return 0;
1304
1305         if (oldval && oldlenp) {
1306                 size_t len;
1307
1308                 if (get_user(len, oldlenp))
1309                         return -EFAULT;
1310
1311                 if (len) {
1312                         if (len > table->maxlen)
1313                                 len = table->maxlen;
1314                         if (copy_to_user(oldval, valp, len))
1315                                 return -EFAULT;
1316                         if (put_user(len, oldlenp))
1317                                 return -EFAULT;
1318                 }
1319         }
1320
1321         *valp = new;
1322
1323         cnf = table->extra1;
1324         net = table->extra2;
1325         i = (int *)table->data - cnf->data;
1326
1327         set_bit(i, cnf->state);
1328
1329         if (cnf == net->ipv4.devconf_dflt)
1330                 devinet_copy_dflt_conf(net, i);
1331
1332         return 1;
1333 }
1334
1335 static int devinet_sysctl_forward(ctl_table *ctl, int write,
1336                                   struct file* filp, void __user *buffer,
1337                                   size_t *lenp, loff_t *ppos)
1338 {
1339         int *valp = ctl->data;
1340         int val = *valp;
1341         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1342
1343         if (write && *valp != val) {
1344                 struct net *net = ctl->extra2;
1345
1346                 if (valp == &IPV4_DEVCONF_ALL(net, FORWARDING))
1347                         inet_forward_change(net);
1348                 else if (valp != &IPV4_DEVCONF_DFLT(net, FORWARDING))
1349                         rt_cache_flush(0);
1350         }
1351
1352         return ret;
1353 }
1354
1355 int ipv4_doint_and_flush(ctl_table *ctl, int write,
1356                          struct file* filp, void __user *buffer,
1357                          size_t *lenp, loff_t *ppos)
1358 {
1359         int *valp = ctl->data;
1360         int val = *valp;
1361         int ret = proc_dointvec(ctl, write, filp, buffer, lenp, ppos);
1362
1363         if (write && *valp != val)
1364                 rt_cache_flush(0);
1365
1366         return ret;
1367 }
1368
1369 int ipv4_doint_and_flush_strategy(ctl_table *table, int __user *name, int nlen,
1370                                   void __user *oldval, size_t __user *oldlenp,
1371                                   void __user *newval, size_t newlen)
1372 {
1373         int ret = devinet_conf_sysctl(table, name, nlen, oldval, oldlenp,
1374                                       newval, newlen);
1375
1376         if (ret == 1)
1377                 rt_cache_flush(0);
1378
1379         return ret;
1380 }
1381
1382
1383 #define DEVINET_SYSCTL_ENTRY(attr, name, mval, proc, sysctl) \
1384         { \
1385                 .ctl_name       = NET_IPV4_CONF_ ## attr, \
1386                 .procname       = name, \
1387                 .data           = ipv4_devconf.data + \
1388                                   NET_IPV4_CONF_ ## attr - 1, \
1389                 .maxlen         = sizeof(int), \
1390                 .mode           = mval, \
1391                 .proc_handler   = proc, \
1392                 .strategy       = sysctl, \
1393                 .extra1         = &ipv4_devconf, \
1394         }
1395
1396 #define DEVINET_SYSCTL_RW_ENTRY(attr, name) \
1397         DEVINET_SYSCTL_ENTRY(attr, name, 0644, devinet_conf_proc, \
1398                              devinet_conf_sysctl)
1399
1400 #define DEVINET_SYSCTL_RO_ENTRY(attr, name) \
1401         DEVINET_SYSCTL_ENTRY(attr, name, 0444, devinet_conf_proc, \
1402                              devinet_conf_sysctl)
1403
1404 #define DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, proc, sysctl) \
1405         DEVINET_SYSCTL_ENTRY(attr, name, 0644, proc, sysctl)
1406
1407 #define DEVINET_SYSCTL_FLUSHING_ENTRY(attr, name) \
1408         DEVINET_SYSCTL_COMPLEX_ENTRY(attr, name, ipv4_doint_and_flush, \
1409                                      ipv4_doint_and_flush_strategy)
1410
1411 static struct devinet_sysctl_table {
1412         struct ctl_table_header *sysctl_header;
1413         struct ctl_table devinet_vars[__NET_IPV4_CONF_MAX];
1414         char *dev_name;
1415 } devinet_sysctl = {
1416         .devinet_vars = {
1417                 DEVINET_SYSCTL_COMPLEX_ENTRY(FORWARDING, "forwarding",
1418                                              devinet_sysctl_forward,
1419                                              devinet_conf_sysctl),
1420                 DEVINET_SYSCTL_RO_ENTRY(MC_FORWARDING, "mc_forwarding"),
1421
1422                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_REDIRECTS, "accept_redirects"),
1423                 DEVINET_SYSCTL_RW_ENTRY(SECURE_REDIRECTS, "secure_redirects"),
1424                 DEVINET_SYSCTL_RW_ENTRY(SHARED_MEDIA, "shared_media"),
1425                 DEVINET_SYSCTL_RW_ENTRY(RP_FILTER, "rp_filter"),
1426                 DEVINET_SYSCTL_RW_ENTRY(SEND_REDIRECTS, "send_redirects"),
1427                 DEVINET_SYSCTL_RW_ENTRY(ACCEPT_SOURCE_ROUTE,
1428                                         "accept_source_route"),
1429                 DEVINET_SYSCTL_RW_ENTRY(PROXY_ARP, "proxy_arp"),
1430                 DEVINET_SYSCTL_RW_ENTRY(MEDIUM_ID, "medium_id"),
1431                 DEVINET_SYSCTL_RW_ENTRY(BOOTP_RELAY, "bootp_relay"),
1432                 DEVINET_SYSCTL_RW_ENTRY(LOG_MARTIANS, "log_martians"),
1433                 DEVINET_SYSCTL_RW_ENTRY(TAG, "tag"),
1434                 DEVINET_SYSCTL_RW_ENTRY(ARPFILTER, "arp_filter"),
1435                 DEVINET_SYSCTL_RW_ENTRY(ARP_ANNOUNCE, "arp_announce"),
1436                 DEVINET_SYSCTL_RW_ENTRY(ARP_IGNORE, "arp_ignore"),
1437                 DEVINET_SYSCTL_RW_ENTRY(ARP_ACCEPT, "arp_accept"),
1438
1439                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOXFRM, "disable_xfrm"),
1440                 DEVINET_SYSCTL_FLUSHING_ENTRY(NOPOLICY, "disable_policy"),
1441                 DEVINET_SYSCTL_FLUSHING_ENTRY(FORCE_IGMP_VERSION,
1442                                               "force_igmp_version"),
1443                 DEVINET_SYSCTL_FLUSHING_ENTRY(PROMOTE_SECONDARIES,
1444                                               "promote_secondaries"),
1445         },
1446 };
1447
1448 static int __devinet_sysctl_register(struct net *net, char *dev_name,
1449                 int ctl_name, struct ipv4_devconf *p)
1450 {
1451         int i;
1452         struct devinet_sysctl_table *t;
1453
1454 #define DEVINET_CTL_PATH_DEV    3
1455
1456         struct ctl_path devinet_ctl_path[] = {
1457                 { .procname = "net", .ctl_name = CTL_NET, },
1458                 { .procname = "ipv4", .ctl_name = NET_IPV4, },
1459                 { .procname = "conf", .ctl_name = NET_IPV4_CONF, },
1460                 { /* to be set */ },
1461                 { },
1462         };
1463
1464         t = kmemdup(&devinet_sysctl, sizeof(*t), GFP_KERNEL);
1465         if (!t)
1466                 goto out;
1467
1468         for (i = 0; i < ARRAY_SIZE(t->devinet_vars) - 1; i++) {
1469                 t->devinet_vars[i].data += (char *)p - (char *)&ipv4_devconf;
1470                 t->devinet_vars[i].extra1 = p;
1471                 t->devinet_vars[i].extra2 = net;
1472         }
1473
1474         /*
1475          * Make a copy of dev_name, because '.procname' is regarded as const
1476          * by sysctl and we wouldn't want anyone to change it under our feet
1477          * (see SIOCSIFNAME).
1478          */
1479         t->dev_name = kstrdup(dev_name, GFP_KERNEL);
1480         if (!t->dev_name)
1481                 goto free;
1482
1483         devinet_ctl_path[DEVINET_CTL_PATH_DEV].procname = t->dev_name;
1484         devinet_ctl_path[DEVINET_CTL_PATH_DEV].ctl_name = ctl_name;
1485
1486         t->sysctl_header = register_net_sysctl_table(net, devinet_ctl_path,
1487                         t->devinet_vars);
1488         if (!t->sysctl_header)
1489                 goto free_procname;
1490
1491         p->sysctl = t;
1492         return 0;
1493
1494 free_procname:
1495         kfree(t->dev_name);
1496 free:
1497         kfree(t);
1498 out:
1499         return -ENOBUFS;
1500 }
1501
1502 static void __devinet_sysctl_unregister(struct ipv4_devconf *cnf)
1503 {
1504         struct devinet_sysctl_table *t = cnf->sysctl;
1505
1506         if (t == NULL)
1507                 return;
1508
1509         cnf->sysctl = NULL;
1510         unregister_sysctl_table(t->sysctl_header);
1511         kfree(t->dev_name);
1512         kfree(t);
1513 }
1514
1515 static void devinet_sysctl_register(struct in_device *idev)
1516 {
1517         neigh_sysctl_register(idev->dev, idev->arp_parms, NET_IPV4,
1518                         NET_IPV4_NEIGH, "ipv4", NULL, NULL);
1519         __devinet_sysctl_register(idev->dev->nd_net, idev->dev->name,
1520                         idev->dev->ifindex, &idev->cnf);
1521 }
1522
1523 static void devinet_sysctl_unregister(struct in_device *idev)
1524 {
1525         __devinet_sysctl_unregister(&idev->cnf);
1526         neigh_sysctl_unregister(idev->arp_parms);
1527 }
1528
1529 static struct ctl_table ctl_forward_entry[] = {
1530         {
1531                 .ctl_name       = NET_IPV4_FORWARD,
1532                 .procname       = "ip_forward",
1533                 .data           = &ipv4_devconf.data[
1534                                         NET_IPV4_CONF_FORWARDING - 1],
1535                 .maxlen         = sizeof(int),
1536                 .mode           = 0644,
1537                 .proc_handler   = devinet_sysctl_forward,
1538                 .strategy       = devinet_conf_sysctl,
1539                 .extra1         = &ipv4_devconf,
1540                 .extra2         = &init_net,
1541         },
1542         { },
1543 };
1544
1545 static __net_initdata struct ctl_path net_ipv4_path[] = {
1546         { .procname = "net", .ctl_name = CTL_NET, },
1547         { .procname = "ipv4", .ctl_name = NET_IPV4, },
1548         { },
1549 };
1550 #endif
1551
1552 static __net_init int devinet_init_net(struct net *net)
1553 {
1554         int err;
1555         struct ipv4_devconf *all, *dflt;
1556 #ifdef CONFIG_SYSCTL
1557         struct ctl_table *tbl = ctl_forward_entry;
1558         struct ctl_table_header *forw_hdr;
1559 #endif
1560
1561         err = -ENOMEM;
1562         all = &ipv4_devconf;
1563         dflt = &ipv4_devconf_dflt;
1564
1565         if (net != &init_net) {
1566                 all = kmemdup(all, sizeof(ipv4_devconf), GFP_KERNEL);
1567                 if (all == NULL)
1568                         goto err_alloc_all;
1569
1570                 dflt = kmemdup(dflt, sizeof(ipv4_devconf_dflt), GFP_KERNEL);
1571                 if (dflt == NULL)
1572                         goto err_alloc_dflt;
1573
1574 #ifdef CONFIG_SYSCTL
1575                 tbl = kmemdup(tbl, sizeof(ctl_forward_entry), GFP_KERNEL);
1576                 if (tbl == NULL)
1577                         goto err_alloc_ctl;
1578
1579                 tbl[0].data = &all->data[NET_IPV4_CONF_FORWARDING - 1];
1580                 tbl[0].extra1 = all;
1581                 tbl[0].extra2 = net;
1582 #endif
1583         }
1584
1585 #ifdef CONFIG_SYSCTL
1586         err = __devinet_sysctl_register(net, "all",
1587                         NET_PROTO_CONF_ALL, all);
1588         if (err < 0)
1589                 goto err_reg_all;
1590
1591         err = __devinet_sysctl_register(net, "default",
1592                         NET_PROTO_CONF_DEFAULT, dflt);
1593         if (err < 0)
1594                 goto err_reg_dflt;
1595
1596         err = -ENOMEM;
1597         forw_hdr = register_net_sysctl_table(net, net_ipv4_path, tbl);
1598         if (forw_hdr == NULL)
1599                 goto err_reg_ctl;
1600         net->ipv4.forw_hdr = forw_hdr;
1601 #endif
1602
1603         net->ipv4.devconf_all = all;
1604         net->ipv4.devconf_dflt = dflt;
1605         return 0;
1606
1607 #ifdef CONFIG_SYSCTL
1608 err_reg_ctl:
1609         __devinet_sysctl_unregister(dflt);
1610 err_reg_dflt:
1611         __devinet_sysctl_unregister(all);
1612 err_reg_all:
1613         if (tbl != ctl_forward_entry)
1614                 kfree(tbl);
1615 err_alloc_ctl:
1616 #endif
1617         if (dflt != &ipv4_devconf_dflt)
1618                 kfree(dflt);
1619 err_alloc_dflt:
1620         if (all != &ipv4_devconf)
1621                 kfree(all);
1622 err_alloc_all:
1623         return err;
1624 }
1625
1626 static __net_exit void devinet_exit_net(struct net *net)
1627 {
1628 #ifdef CONFIG_SYSCTL
1629         struct ctl_table *tbl;
1630
1631         tbl = net->ipv4.forw_hdr->ctl_table_arg;
1632         unregister_net_sysctl_table(net->ipv4.forw_hdr);
1633         __devinet_sysctl_unregister(net->ipv4.devconf_dflt);
1634         __devinet_sysctl_unregister(net->ipv4.devconf_all);
1635         kfree(tbl);
1636 #endif
1637         kfree(net->ipv4.devconf_dflt);
1638         kfree(net->ipv4.devconf_all);
1639 }
1640
1641 static __net_initdata struct pernet_operations devinet_ops = {
1642         .init = devinet_init_net,
1643         .exit = devinet_exit_net,
1644 };
1645
1646 void __init devinet_init(void)
1647 {
1648         register_pernet_subsys(&devinet_ops);
1649
1650         register_gifconf(PF_INET, inet_gifconf);
1651         register_netdevice_notifier(&ip_netdev_notifier);
1652
1653         rtnl_register(PF_INET, RTM_NEWADDR, inet_rtm_newaddr, NULL);
1654         rtnl_register(PF_INET, RTM_DELADDR, inet_rtm_deladdr, NULL);
1655         rtnl_register(PF_INET, RTM_GETADDR, NULL, inet_dump_ifaddr);
1656 }
1657
1658 EXPORT_SYMBOL(in_dev_finish_destroy);
1659 EXPORT_SYMBOL(inet_select_addr);
1660 EXPORT_SYMBOL(inetdev_by_index);
1661 EXPORT_SYMBOL(register_inetaddr_notifier);
1662 EXPORT_SYMBOL(unregister_inetaddr_notifier);