aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r--net/ipv4/inet_hashtables.c96
1 files changed, 82 insertions, 14 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c
index 3a5f12f011cb..0a8aec3f37cc 100644
--- a/net/ipv4/inet_hashtables.c
+++ b/net/ipv4/inet_hashtables.c
@@ -24,6 +24,9 @@
24#include <net/addrconf.h> 24#include <net/addrconf.h>
25#include <net/inet_connection_sock.h> 25#include <net/inet_connection_sock.h>
26#include <net/inet_hashtables.h> 26#include <net/inet_hashtables.h>
27#if IS_ENABLED(CONFIG_IPV6)
28#include <net/inet6_hashtables.h>
29#endif
27#include <net/secure_seq.h> 30#include <net/secure_seq.h>
28#include <net/ip.h> 31#include <net/ip.h>
29#include <net/tcp.h> 32#include <net/tcp.h>
@@ -504,7 +507,7 @@ not_unique:
504 return -EADDRNOTAVAIL; 507 return -EADDRNOTAVAIL;
505} 508}
506 509
507static u32 inet_sk_port_offset(const struct sock *sk) 510static u64 inet_sk_port_offset(const struct sock *sk)
508{ 511{
509 const struct inet_sock *inet = inet_sk(sk); 512 const struct inet_sock *inet = inet_sk(sk);
510 513
@@ -513,10 +516,52 @@ static u32 inet_sk_port_offset(const struct sock *sk)
513 inet->inet_dport); 516 inet->inet_dport);
514} 517}
515 518
516/* insert a socket into ehash, and eventually remove another one 519/* Searches for an exsiting socket in the ehash bucket list.
517 * (The another one can be a SYN_RECV or TIMEWAIT 520 * Returns true if found, false otherwise.
518 */ 521 */
519bool inet_ehash_insert(struct sock *sk, struct sock *osk) 522static bool inet_ehash_lookup_by_sk(struct sock *sk,
523 struct hlist_nulls_head *list)
524{
525 const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num);
526 const int sdif = sk->sk_bound_dev_if;
527 const int dif = sk->sk_bound_dev_if;
528 const struct hlist_nulls_node *node;
529 struct net *net = sock_net(sk);
530 struct sock *esk;
531
532 INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr);
533
534 sk_nulls_for_each_rcu(esk, node, list) {
535 if (esk->sk_hash != sk->sk_hash)
536 continue;
537 if (sk->sk_family == AF_INET) {
538 if (unlikely(INET_MATCH(esk, net, acookie,
539 sk->sk_daddr,
540 sk->sk_rcv_saddr,
541 ports, dif, sdif))) {
542 return true;
543 }
544 }
545#if IS_ENABLED(CONFIG_IPV6)
546 else if (sk->sk_family == AF_INET6) {
547 if (unlikely(INET6_MATCH(esk, net,
548 &sk->sk_v6_daddr,
549 &sk->sk_v6_rcv_saddr,
550 ports, dif, sdif))) {
551 return true;
552 }
553 }
554#endif
555 }
556 return false;
557}
558
559/* Insert a socket into ehash, and eventually remove another one
560 * (The another one can be a SYN_RECV or TIMEWAIT)
561 * If an existing socket already exists, socket sk is not inserted,
562 * and sets found_dup_sk parameter to true.
563 */
564bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk)
520{ 565{
521 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; 566 struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo;
522 struct hlist_nulls_head *list; 567 struct hlist_nulls_head *list;
@@ -535,16 +580,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk)
535 if (osk) { 580 if (osk) {
536 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); 581 WARN_ON_ONCE(sk->sk_hash != osk->sk_hash);
537 ret = sk_nulls_del_node_init_rcu(osk); 582 ret = sk_nulls_del_node_init_rcu(osk);
583 } else if (found_dup_sk) {
584 *found_dup_sk = inet_ehash_lookup_by_sk(sk, list);
585 if (*found_dup_sk)
586 ret = false;
538 } 587 }
588
539 if (ret) 589 if (ret)
540 __sk_nulls_add_node_rcu(sk, list); 590 __sk_nulls_add_node_rcu(sk, list);
591
541 spin_unlock(lock); 592 spin_unlock(lock);
593
542 return ret; 594 return ret;
543} 595}
544 596
545bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) 597bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk)
546{ 598{
547 bool ok = inet_ehash_insert(sk, osk); 599 bool ok = inet_ehash_insert(sk, osk, found_dup_sk);
548 600
549 if (ok) { 601 if (ok) {
550 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); 602 sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1);
@@ -588,7 +640,7 @@ int __inet_hash(struct sock *sk, struct sock *osk)
588 int err = 0; 640 int err = 0;
589 641
590 if (sk->sk_state != TCP_LISTEN) { 642 if (sk->sk_state != TCP_LISTEN) {
591 inet_ehash_nolisten(sk, osk); 643 inet_ehash_nolisten(sk, osk, NULL);
592 return 0; 644 return 0;
593 } 645 }
594 WARN_ON(!sk_unhashed(sk)); 646 WARN_ON(!sk_unhashed(sk));
@@ -662,8 +714,19 @@ unlock:
662} 714}
663EXPORT_SYMBOL_GPL(inet_unhash); 715EXPORT_SYMBOL_GPL(inet_unhash);
664 716
717/* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm
718 * Note that we use 32bit integers (vs RFC 'short integers')
719 * because 2^16 is not a multiple of num_ephemeral and this
720 * property might be used by clever attacker.
721 * RFC claims using TABLE_LENGTH=10 buckets gives an improvement,
722 * we use 256 instead to really give more isolation and
723 * privacy, this only consumes 1 KB of kernel memory.
724 */
725#define INET_TABLE_PERTURB_SHIFT 8
726static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT];
727
665int __inet_hash_connect(struct inet_timewait_death_row *death_row, 728int __inet_hash_connect(struct inet_timewait_death_row *death_row,
666 struct sock *sk, u32 port_offset, 729 struct sock *sk, u64 port_offset,
667 int (*check_established)(struct inet_timewait_death_row *, 730 int (*check_established)(struct inet_timewait_death_row *,
668 struct sock *, __u16, struct inet_timewait_sock **)) 731 struct sock *, __u16, struct inet_timewait_sock **))
669{ 732{
@@ -675,7 +738,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
675 struct inet_bind_bucket *tb; 738 struct inet_bind_bucket *tb;
676 u32 remaining, offset; 739 u32 remaining, offset;
677 int ret, i, low, high; 740 int ret, i, low, high;
678 static u32 hint; 741 u32 index;
679 742
680 if (port) { 743 if (port) {
681 head = &hinfo->bhash[inet_bhashfn(net, port, 744 head = &hinfo->bhash[inet_bhashfn(net, port,
@@ -683,7 +746,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
683 tb = inet_csk(sk)->icsk_bind_hash; 746 tb = inet_csk(sk)->icsk_bind_hash;
684 spin_lock_bh(&head->lock); 747 spin_lock_bh(&head->lock);
685 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { 748 if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) {
686 inet_ehash_nolisten(sk, NULL); 749 inet_ehash_nolisten(sk, NULL, NULL);
687 spin_unlock_bh(&head->lock); 750 spin_unlock_bh(&head->lock);
688 return 0; 751 return 0;
689 } 752 }
@@ -700,7 +763,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row,
700 if (likely(remaining > 1)) 763 if (likely(remaining > 1))
701 remaining &= ~1U; 764 remaining &= ~1U;
702 765
703 offset = (hint + port_offset) % remaining; 766 net_get_random_once(table_perturb, sizeof(table_perturb));
767 index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT);
768
769 offset = READ_ONCE(table_perturb[index]) + port_offset;
770 offset %= remaining;
771
704 /* In first pass we try ports of @low parity. 772 /* In first pass we try ports of @low parity.
705 * inet_csk_get_port() does the opposite choice. 773 * inet_csk_get_port() does the opposite choice.
706 */ 774 */
@@ -753,13 +821,13 @@ next_port:
753 return -EADDRNOTAVAIL; 821 return -EADDRNOTAVAIL;
754 822
755ok: 823ok:
756 hint += i + 2; 824 WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2);
757 825
758 /* Head lock still held and bh's disabled */ 826 /* Head lock still held and bh's disabled */
759 inet_bind_hash(sk, tb, port); 827 inet_bind_hash(sk, tb, port);
760 if (sk_unhashed(sk)) { 828 if (sk_unhashed(sk)) {
761 inet_sk(sk)->inet_sport = htons(port); 829 inet_sk(sk)->inet_sport = htons(port);
762 inet_ehash_nolisten(sk, (struct sock *)tw); 830 inet_ehash_nolisten(sk, (struct sock *)tw, NULL);
763 } 831 }
764 if (tw) 832 if (tw)
765 inet_twsk_bind_unhash(tw, hinfo); 833 inet_twsk_bind_unhash(tw, hinfo);
@@ -776,7 +844,7 @@ ok:
776int inet_hash_connect(struct inet_timewait_death_row *death_row, 844int inet_hash_connect(struct inet_timewait_death_row *death_row,
777 struct sock *sk) 845 struct sock *sk)
778{ 846{
779 u32 port_offset = 0; 847 u64 port_offset = 0;
780 848
781 if (!inet_sk(sk)->inet_num) 849 if (!inet_sk(sk)->inet_num)
782 port_offset = inet_sk_port_offset(sk); 850 port_offset = inet_sk_port_offset(sk);