diff options
Diffstat (limited to 'net/ipv4/inet_hashtables.c')
-rw-r--r-- | net/ipv4/inet_hashtables.c | 96 |
1 files changed, 82 insertions, 14 deletions
diff --git a/net/ipv4/inet_hashtables.c b/net/ipv4/inet_hashtables.c index 3a5f12f011cb..0a8aec3f37cc 100644 --- a/net/ipv4/inet_hashtables.c +++ b/net/ipv4/inet_hashtables.c | |||
@@ -24,6 +24,9 @@ | |||
24 | #include <net/addrconf.h> | 24 | #include <net/addrconf.h> |
25 | #include <net/inet_connection_sock.h> | 25 | #include <net/inet_connection_sock.h> |
26 | #include <net/inet_hashtables.h> | 26 | #include <net/inet_hashtables.h> |
27 | #if IS_ENABLED(CONFIG_IPV6) | ||
28 | #include <net/inet6_hashtables.h> | ||
29 | #endif | ||
27 | #include <net/secure_seq.h> | 30 | #include <net/secure_seq.h> |
28 | #include <net/ip.h> | 31 | #include <net/ip.h> |
29 | #include <net/tcp.h> | 32 | #include <net/tcp.h> |
@@ -504,7 +507,7 @@ not_unique: | |||
504 | return -EADDRNOTAVAIL; | 507 | return -EADDRNOTAVAIL; |
505 | } | 508 | } |
506 | 509 | ||
507 | static u32 inet_sk_port_offset(const struct sock *sk) | 510 | static u64 inet_sk_port_offset(const struct sock *sk) |
508 | { | 511 | { |
509 | const struct inet_sock *inet = inet_sk(sk); | 512 | const struct inet_sock *inet = inet_sk(sk); |
510 | 513 | ||
@@ -513,10 +516,52 @@ static u32 inet_sk_port_offset(const struct sock *sk) | |||
513 | inet->inet_dport); | 516 | inet->inet_dport); |
514 | } | 517 | } |
515 | 518 | ||
516 | /* insert a socket into ehash, and eventually remove another one | 519 | /* Searches for an exsiting socket in the ehash bucket list. |
517 | * (The another one can be a SYN_RECV or TIMEWAIT | 520 | * Returns true if found, false otherwise. |
518 | */ | 521 | */ |
519 | bool inet_ehash_insert(struct sock *sk, struct sock *osk) | 522 | static bool inet_ehash_lookup_by_sk(struct sock *sk, |
523 | struct hlist_nulls_head *list) | ||
524 | { | ||
525 | const __portpair ports = INET_COMBINED_PORTS(sk->sk_dport, sk->sk_num); | ||
526 | const int sdif = sk->sk_bound_dev_if; | ||
527 | const int dif = sk->sk_bound_dev_if; | ||
528 | const struct hlist_nulls_node *node; | ||
529 | struct net *net = sock_net(sk); | ||
530 | struct sock *esk; | ||
531 | |||
532 | INET_ADDR_COOKIE(acookie, sk->sk_daddr, sk->sk_rcv_saddr); | ||
533 | |||
534 | sk_nulls_for_each_rcu(esk, node, list) { | ||
535 | if (esk->sk_hash != sk->sk_hash) | ||
536 | continue; | ||
537 | if (sk->sk_family == AF_INET) { | ||
538 | if (unlikely(INET_MATCH(esk, net, acookie, | ||
539 | sk->sk_daddr, | ||
540 | sk->sk_rcv_saddr, | ||
541 | ports, dif, sdif))) { | ||
542 | return true; | ||
543 | } | ||
544 | } | ||
545 | #if IS_ENABLED(CONFIG_IPV6) | ||
546 | else if (sk->sk_family == AF_INET6) { | ||
547 | if (unlikely(INET6_MATCH(esk, net, | ||
548 | &sk->sk_v6_daddr, | ||
549 | &sk->sk_v6_rcv_saddr, | ||
550 | ports, dif, sdif))) { | ||
551 | return true; | ||
552 | } | ||
553 | } | ||
554 | #endif | ||
555 | } | ||
556 | return false; | ||
557 | } | ||
558 | |||
559 | /* Insert a socket into ehash, and eventually remove another one | ||
560 | * (The another one can be a SYN_RECV or TIMEWAIT) | ||
561 | * If an existing socket already exists, socket sk is not inserted, | ||
562 | * and sets found_dup_sk parameter to true. | ||
563 | */ | ||
564 | bool inet_ehash_insert(struct sock *sk, struct sock *osk, bool *found_dup_sk) | ||
520 | { | 565 | { |
521 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; | 566 | struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; |
522 | struct hlist_nulls_head *list; | 567 | struct hlist_nulls_head *list; |
@@ -535,16 +580,23 @@ bool inet_ehash_insert(struct sock *sk, struct sock *osk) | |||
535 | if (osk) { | 580 | if (osk) { |
536 | WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); | 581 | WARN_ON_ONCE(sk->sk_hash != osk->sk_hash); |
537 | ret = sk_nulls_del_node_init_rcu(osk); | 582 | ret = sk_nulls_del_node_init_rcu(osk); |
583 | } else if (found_dup_sk) { | ||
584 | *found_dup_sk = inet_ehash_lookup_by_sk(sk, list); | ||
585 | if (*found_dup_sk) | ||
586 | ret = false; | ||
538 | } | 587 | } |
588 | |||
539 | if (ret) | 589 | if (ret) |
540 | __sk_nulls_add_node_rcu(sk, list); | 590 | __sk_nulls_add_node_rcu(sk, list); |
591 | |||
541 | spin_unlock(lock); | 592 | spin_unlock(lock); |
593 | |||
542 | return ret; | 594 | return ret; |
543 | } | 595 | } |
544 | 596 | ||
545 | bool inet_ehash_nolisten(struct sock *sk, struct sock *osk) | 597 | bool inet_ehash_nolisten(struct sock *sk, struct sock *osk, bool *found_dup_sk) |
546 | { | 598 | { |
547 | bool ok = inet_ehash_insert(sk, osk); | 599 | bool ok = inet_ehash_insert(sk, osk, found_dup_sk); |
548 | 600 | ||
549 | if (ok) { | 601 | if (ok) { |
550 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); | 602 | sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); |
@@ -588,7 +640,7 @@ int __inet_hash(struct sock *sk, struct sock *osk) | |||
588 | int err = 0; | 640 | int err = 0; |
589 | 641 | ||
590 | if (sk->sk_state != TCP_LISTEN) { | 642 | if (sk->sk_state != TCP_LISTEN) { |
591 | inet_ehash_nolisten(sk, osk); | 643 | inet_ehash_nolisten(sk, osk, NULL); |
592 | return 0; | 644 | return 0; |
593 | } | 645 | } |
594 | WARN_ON(!sk_unhashed(sk)); | 646 | WARN_ON(!sk_unhashed(sk)); |
@@ -662,8 +714,19 @@ unlock: | |||
662 | } | 714 | } |
663 | EXPORT_SYMBOL_GPL(inet_unhash); | 715 | EXPORT_SYMBOL_GPL(inet_unhash); |
664 | 716 | ||
717 | /* RFC 6056 3.3.4. Algorithm 4: Double-Hash Port Selection Algorithm | ||
718 | * Note that we use 32bit integers (vs RFC 'short integers') | ||
719 | * because 2^16 is not a multiple of num_ephemeral and this | ||
720 | * property might be used by clever attacker. | ||
721 | * RFC claims using TABLE_LENGTH=10 buckets gives an improvement, | ||
722 | * we use 256 instead to really give more isolation and | ||
723 | * privacy, this only consumes 1 KB of kernel memory. | ||
724 | */ | ||
725 | #define INET_TABLE_PERTURB_SHIFT 8 | ||
726 | static u32 table_perturb[1 << INET_TABLE_PERTURB_SHIFT]; | ||
727 | |||
665 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, | 728 | int __inet_hash_connect(struct inet_timewait_death_row *death_row, |
666 | struct sock *sk, u32 port_offset, | 729 | struct sock *sk, u64 port_offset, |
667 | int (*check_established)(struct inet_timewait_death_row *, | 730 | int (*check_established)(struct inet_timewait_death_row *, |
668 | struct sock *, __u16, struct inet_timewait_sock **)) | 731 | struct sock *, __u16, struct inet_timewait_sock **)) |
669 | { | 732 | { |
@@ -675,7 +738,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
675 | struct inet_bind_bucket *tb; | 738 | struct inet_bind_bucket *tb; |
676 | u32 remaining, offset; | 739 | u32 remaining, offset; |
677 | int ret, i, low, high; | 740 | int ret, i, low, high; |
678 | static u32 hint; | 741 | u32 index; |
679 | 742 | ||
680 | if (port) { | 743 | if (port) { |
681 | head = &hinfo->bhash[inet_bhashfn(net, port, | 744 | head = &hinfo->bhash[inet_bhashfn(net, port, |
@@ -683,7 +746,7 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
683 | tb = inet_csk(sk)->icsk_bind_hash; | 746 | tb = inet_csk(sk)->icsk_bind_hash; |
684 | spin_lock_bh(&head->lock); | 747 | spin_lock_bh(&head->lock); |
685 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { | 748 | if (sk_head(&tb->owners) == sk && !sk->sk_bind_node.next) { |
686 | inet_ehash_nolisten(sk, NULL); | 749 | inet_ehash_nolisten(sk, NULL, NULL); |
687 | spin_unlock_bh(&head->lock); | 750 | spin_unlock_bh(&head->lock); |
688 | return 0; | 751 | return 0; |
689 | } | 752 | } |
@@ -700,7 +763,12 @@ int __inet_hash_connect(struct inet_timewait_death_row *death_row, | |||
700 | if (likely(remaining > 1)) | 763 | if (likely(remaining > 1)) |
701 | remaining &= ~1U; | 764 | remaining &= ~1U; |
702 | 765 | ||
703 | offset = (hint + port_offset) % remaining; | 766 | net_get_random_once(table_perturb, sizeof(table_perturb)); |
767 | index = hash_32(port_offset, INET_TABLE_PERTURB_SHIFT); | ||
768 | |||
769 | offset = READ_ONCE(table_perturb[index]) + port_offset; | ||
770 | offset %= remaining; | ||
771 | |||
704 | /* In first pass we try ports of @low parity. | 772 | /* In first pass we try ports of @low parity. |
705 | * inet_csk_get_port() does the opposite choice. | 773 | * inet_csk_get_port() does the opposite choice. |
706 | */ | 774 | */ |
@@ -753,13 +821,13 @@ next_port: | |||
753 | return -EADDRNOTAVAIL; | 821 | return -EADDRNOTAVAIL; |
754 | 822 | ||
755 | ok: | 823 | ok: |
756 | hint += i + 2; | 824 | WRITE_ONCE(table_perturb[index], READ_ONCE(table_perturb[index]) + i + 2); |
757 | 825 | ||
758 | /* Head lock still held and bh's disabled */ | 826 | /* Head lock still held and bh's disabled */ |
759 | inet_bind_hash(sk, tb, port); | 827 | inet_bind_hash(sk, tb, port); |
760 | if (sk_unhashed(sk)) { | 828 | if (sk_unhashed(sk)) { |
761 | inet_sk(sk)->inet_sport = htons(port); | 829 | inet_sk(sk)->inet_sport = htons(port); |
762 | inet_ehash_nolisten(sk, (struct sock *)tw); | 830 | inet_ehash_nolisten(sk, (struct sock *)tw, NULL); |
763 | } | 831 | } |
764 | if (tw) | 832 | if (tw) |
765 | inet_twsk_bind_unhash(tw, hinfo); | 833 | inet_twsk_bind_unhash(tw, hinfo); |
@@ -776,7 +844,7 @@ ok: | |||
776 | int inet_hash_connect(struct inet_timewait_death_row *death_row, | 844 | int inet_hash_connect(struct inet_timewait_death_row *death_row, |
777 | struct sock *sk) | 845 | struct sock *sk) |
778 | { | 846 | { |
779 | u32 port_offset = 0; | 847 | u64 port_offset = 0; |
780 | 848 | ||
781 | if (!inet_sk(sk)->inet_num) | 849 | if (!inet_sk(sk)->inet_num) |
782 | port_offset = inet_sk_port_offset(sk); | 850 | port_offset = inet_sk_port_offset(sk); |