|
1 | 1 | use crate::domain_knowledge::{NodeId, NodeInfo};
|
2 |
| -use num::BigUint; |
3 |
| -use std::{ops::BitXor, str::FromStr, time::Instant}; |
4 |
| -use tracing::{info, trace}; |
| 2 | +use std::time::Instant; |
| 3 | +use tracing::info; |
5 | 4 |
|
6 | 5 | /// The routing table at the heart of the Kademlia DHT. It keep the near neighbors of ourself.
|
7 |
| -#[derive(Debug)] |
| 6 | +#[derive(Debug, Hash, PartialEq, Eq, Clone)] |
8 | 7 | pub struct RoutingTable {
|
| 8 | + bucket_size: usize, |
9 | 9 | /// The node id of the ourself.
|
10 |
| - id: BigUint, |
| 10 | + id: NodeId, |
11 | 11 |
|
12 |
| - /// each bucket contains |
13 |
| - pub(crate) buckets: Vec<Bucket>, |
14 |
| -} |
15 |
| - |
16 |
| -#[derive(Debug)] |
17 |
| -pub struct Bucket { |
18 |
| - /// inclusive |
19 |
| - lower_bound: BigUint, |
20 |
| - /// exclusive |
21 |
| - upper_bound: BigUint, |
22 |
| - |
23 |
| - // TODO: technically a bucket is at most 8 nodes, use a fixed size vector |
24 |
| - nodes: Vec<Node>, |
25 |
| -} |
26 |
| - |
27 |
| -impl Bucket { |
28 |
| - pub fn full(&self) -> bool { |
29 |
| - assert!(self.nodes.len() <= 8); |
30 |
| - self.nodes.len() >= 8 |
31 |
| - } |
| 12 | + pub(crate) buckets: Box<[Option<NodeEntry>]>, |
32 | 13 | }
|
33 | 14 |
|
34 | 15 | #[derive(Debug, Clone, PartialEq, Eq, Hash, Copy)]
|
35 |
| -// TODO: this name is shit, think of a better one |
36 |
| -pub struct Node { |
| 16 | +pub struct NodeEntry { |
37 | 17 | pub(crate) contact: NodeInfo,
|
38 | 18 | pub(crate) last_checked: Instant,
|
39 | 19 | }
|
40 | 20 |
|
41 | 21 | impl RoutingTable {
|
42 | 22 | pub fn new(id: NodeId) -> Self {
|
43 |
| - let default_bucket = Bucket { |
44 |
| - lower_bound: BigUint::from(0u8), |
45 |
| - // 2^160 |
46 |
| - upper_bound: BigUint::from_str("1461501637330902918203684832716283019655932542976").unwrap(), |
47 |
| - nodes: Vec::new(), |
48 |
| - }; |
49 |
| - |
50 | 23 | RoutingTable {
|
51 |
| - id: BigUint::from_bytes_be(id.as_bytes()), |
52 |
| - buckets: vec![default_bucket], |
| 24 | + bucket_size: 8, |
| 25 | + id, |
| 26 | + buckets: Box::new([Option::None; 160 * 8]), |
53 | 27 | }
|
54 | 28 | }
|
55 | 29 |
|
56 | 30 | pub fn node_count(&self) -> usize {
|
57 |
| - self.buckets.iter().map(|b| b.nodes.len()).sum() |
| 31 | + // TODO: optimize this |
| 32 | + self.buckets.iter().filter(|n| n.is_some()).count() |
58 | 33 | }
|
59 | 34 |
|
60 | 35 | /// Add a new node to the routing table, if the buckets are full, the node will be ignored.
|
61 | 36 | pub fn add_new_node(&mut self, contact: NodeInfo) {
|
62 |
| - // TODO: handle duplicate nodes |
63 |
| - |
64 | 37 | // there is a special case, when we already know this node, in that case, we just update the
|
65 | 38 | // last_checked timestamp.
|
66 |
| - if let Some(node) = self |
| 39 | + let exact_match = self |
67 | 40 | .buckets
|
68 | 41 | .iter_mut()
|
69 |
| - .map(|b| b.nodes.iter_mut()) |
70 | 42 | .flatten()
|
71 |
| - .find(|node| node.contact.id() == contact.id()) |
72 |
| - { |
73 |
| - node.last_checked = Instant::now(); |
| 43 | + .find(|node| node.contact.id() == contact.id()); |
| 44 | + if let Some(n) = exact_match { |
| 45 | + n.last_checked = Instant::now(); |
74 | 46 | return;
|
75 | 47 | }
|
76 | 48 |
|
77 |
| - let our_id = &self.id; |
78 |
| - let distance = our_id.bitxor(BigUint::from_bytes_be(contact.id().as_bytes())); |
| 49 | + let bucket = self.bucket_for_mut(&contact.id()); |
| 50 | + let slot = bucket.iter_mut().find(|n| n.is_none()); |
79 | 51 |
|
80 |
| - // first, find the bucket that this node belongs in |
81 |
| - let target_bucket = self |
82 |
| - .buckets |
83 |
| - .iter_mut() |
84 |
| - .find(|bucket| bucket.lower_bound <= distance && distance < bucket.upper_bound) |
85 |
| - .unwrap(); |
86 |
| - |
87 |
| - let (full, within_our_bucket) = ( |
88 |
| - target_bucket.full(), |
89 |
| - &target_bucket.lower_bound <= our_id && our_id < &target_bucket.upper_bound, |
90 |
| - ); |
91 |
| - match (full, within_our_bucket) { |
92 |
| - // if the bucket is full and our id is within our bucket, we need to split it |
93 |
| - (true, true) => { |
94 |
| - // split the bucket, the new bucket is the upper half of the old bucket |
95 |
| - let mut new_bucket = Bucket { |
96 |
| - lower_bound: &target_bucket.upper_bound / 2u8, |
97 |
| - upper_bound: target_bucket.upper_bound.clone(), |
98 |
| - nodes: Vec::new(), |
99 |
| - }; |
100 |
| - |
101 |
| - // transfer all the nodes that should go into the new bucket into the right place |
102 |
| - // do I prefer the draining_filter API? yes but that's sadly nightly only |
103 |
| - let mut i = 0; |
104 |
| - while i < target_bucket.nodes.len() { |
105 |
| - let target_bucket_node_id = BigUint::from_bytes_be(target_bucket.nodes[i].contact.id().as_bytes()); |
106 |
| - if &target_bucket_node_id <= &new_bucket.lower_bound { |
107 |
| - let node = target_bucket.nodes.remove(i); |
108 |
| - new_bucket.nodes.push(node); |
109 |
| - } else { |
110 |
| - i += 1; |
111 |
| - } |
112 |
| - } |
113 |
| - |
114 |
| - target_bucket.upper_bound = &target_bucket.upper_bound / 2u8; |
115 |
| - self.buckets.push(new_bucket); |
116 |
| - trace!("bucket split"); |
117 |
| - } |
118 |
| - // if the bucket id range is not within our id and the bucket is full, we don't need to do |
119 |
| - // anything |
120 |
| - (true, false) => { |
121 |
| - trace!("node not added, bucket full and not within our id"); |
122 |
| - } |
123 |
| - // if the buckets are not full, then happy days, we just add the new node |
124 |
| - (false, _) => { |
125 |
| - target_bucket.nodes.push(Node { |
| 52 | + // TODO: I recall there is more sophisticated to whether to ignore the insertion or not |
| 53 | + match slot { |
| 54 | + Some(inner) => { |
| 55 | + inner.replace(NodeEntry { |
126 | 56 | contact,
|
127 | 57 | last_checked: Instant::now(),
|
128 | 58 | });
|
129 |
| - trace!("node added"); |
| 59 | + info!("{contact:?} added to routing table"); |
| 60 | + return (); |
| 61 | + } |
| 62 | + None => { |
| 63 | + info!("table full, {contact:?} not added"); |
| 64 | + return (); // we're full |
130 | 65 | }
|
131 | 66 | }
|
132 |
| - info!("node processed, node count: {}", self.node_count()); |
133 | 67 | }
|
134 | 68 |
|
| 69 | + // TODO: return an iterator instead? |
135 | 70 | pub fn find_closest(&self, target: NodeId) -> Vec<NodeInfo> {
|
136 |
| - let mut closest_nodes: Vec<_> = self |
137 |
| - .buckets |
138 |
| - .iter() |
139 |
| - .map(|bucket| { |
140 |
| - bucket.nodes.iter().map(|node| { |
141 |
| - let node_id = node.contact.id(); |
142 |
| - let node_id = node_id.as_bytes(); |
143 |
| - let target = target.as_bytes(); |
144 |
| - |
145 |
| - let mut distance = [0u8; 20]; |
146 |
| - |
147 |
| - // zip for array is sadly unstable |
148 |
| - let mut i = 0; |
149 |
| - while i < 20 { |
150 |
| - distance[i] = node_id[i] ^ target[i]; |
151 |
| - i += 1; |
152 |
| - } |
153 |
| - |
154 |
| - (BigUint::from_bytes_be(&distance), &node.contact) |
155 |
| - }) |
156 |
| - }) |
157 |
| - .flatten() |
158 |
| - .collect(); |
| 71 | + let (bucket_i, bucket_i_end) = self.indices(&target); |
| 72 | + let bucket = &self.buckets[bucket_i..bucket_i_end]; |
159 | 73 |
|
160 |
| - closest_nodes.sort_unstable_by_key(|x| x.0.clone()); |
161 |
| - closest_nodes |
162 |
| - .iter() |
163 |
| - .filter(|(_, node)| node.id() != target) |
164 |
| - .take(8) |
165 |
| - .map(|x| x.1) |
166 |
| - .cloned() |
167 |
| - .collect() |
| 74 | + let mut valid_entries: Vec<_> = bucket.iter().flatten().collect(); |
| 75 | + valid_entries.sort_unstable_by_key(|e| e.contact.id()); |
| 76 | + |
| 77 | + valid_entries.into_iter().map(|n| n.contact).collect() |
168 | 78 | }
|
169 | 79 |
|
170 |
| - pub fn find(&self, target: NodeId) -> Option<Node> { |
| 80 | + pub fn find(&self, target: NodeId) -> Option<NodeInfo> { |
171 | 81 | self.buckets
|
172 | 82 | .iter()
|
173 |
| - .map(|bucket| bucket.nodes.iter()) |
174 | 83 | .flatten()
|
175 | 84 | .find(|node| node.contact.id() == target)
|
176 |
| - .cloned() |
| 85 | + .map(|n| n.contact) |
| 86 | + .clone() |
| 87 | + } |
| 88 | + |
| 89 | + /// Returns the `index` where `self.buckets[index]` is the first entry in the corresponding |
| 90 | + /// k-bucket, and `index + (self.bucket_size - 1)` is the last entry (inclusive), so |
| 91 | + /// `index..(index + self.bucket_size)` is the valid range. |
| 92 | + fn index(&self, target: &NodeId) -> usize { |
| 93 | + // Each k-bucket at index i stores nodes of distance [2^i, 2^(i + 1)) from ourself. The |
| 94 | + // first byte in the distance where it's not zero tells us the distance falls precisely |
| 95 | + // within 2^i and 2^(i + 1). |
| 96 | + |
| 97 | + let dist = self.id.dist(&target); |
| 98 | + // all zero means we're finding outself, then we go look for in the 0th bucket. |
| 99 | + let first_nonzero = dist.iter().position(|radix| *radix != 0).unwrap_or(159); |
| 100 | + (159 - first_nonzero) * self.bucket_size |
| 101 | + } |
| 102 | + |
| 103 | + /// [begin, end) range of the corresponding k-bucket for `target`, the range should be accessed |
| 104 | + /// directly like `self.buckets[begin]`, the stride jumping is already done for you. |
| 105 | + fn indices(&self, target: &NodeId) -> (usize, usize) { |
| 106 | + let begin = self.index(target); |
| 107 | + let end = begin + self.bucket_size; |
| 108 | + (begin, end) |
| 109 | + } |
| 110 | + |
| 111 | + #[allow(unused)] |
| 112 | + fn bucket_for(&self, target: &NodeId) -> &[Option<NodeEntry>] { |
| 113 | + let (begin, end) = self.indices(target); |
| 114 | + &self.buckets[begin..end] |
| 115 | + } |
| 116 | + |
| 117 | + fn bucket_for_mut(&mut self, target: &NodeId) -> &mut [Option<NodeEntry>] { |
| 118 | + let (begin, end) = self.indices(target); |
| 119 | + &mut self.buckets[begin..end] |
177 | 120 | }
|
178 | 121 | }
|
0 commit comments