From 884bcad017e9929aefdd6dd33bbd6f79910b76d9 Mon Sep 17 00:00:00 2001 From: Andrew Morris Date: Sat, 28 Oct 2023 10:05:16 +1100 Subject: [PATCH] Refactor StorageVal (remove StoragePoint) --- storage/src/storage_ops.rs | 9 +- storage/src/storage_val.rs | 243 ++++++++++++++++++++++--------------- storage/src/tests.rs | 110 ++++++++--------- 3 files changed, 197 insertions(+), 165 deletions(-) diff --git a/storage/src/storage_ops.rs b/storage/src/storage_ops.rs index df97d2f..536b5a4 100644 --- a/storage/src/storage_ops.rs +++ b/storage/src/storage_ops.rs @@ -47,7 +47,7 @@ where None => return Ok(None), }; - let value = self.read(key)?.map(|entry| entry.to_val()); + let value = self.read(key)?.map(|entry| entry.move_to_val()); Ok(value) } @@ -74,10 +74,7 @@ where fn store_with_replacements(&mut self, value: &StorageVal) -> Result { let mut cache = HashMap::::new(); - if let Some(key) = value - .point - .maybe_replace_store(self, &value.refs, &mut cache)? - { + if let Some(key) = value.maybe_replace_store(self, &mut cache)? { return Ok(key); } @@ -89,7 +86,7 @@ where self.write(key, Some(&value.to_entry()))?; self.ref_delta(key, -1)?; // Cancel out the assumed single reference - for subkey in value.refs.iter() { + for subkey in value.refs_iter() { self.ref_delta(*subkey, 1)?; } diff --git a/storage/src/storage_val.rs b/storage/src/storage_val.rs index 4f7976c..93bbc93 100644 --- a/storage/src/storage_val.rs +++ b/storage/src/storage_val.rs @@ -11,51 +11,74 @@ use crate::storage_ptr::StorageEntryPtr; #[cfg(test)] use crate::{Storage, StorageBackend}; -#[derive(Serialize, Deserialize, Debug)] -pub struct StorageVal { - pub point: StoragePoint, +#[derive(Default, Debug, Serialize, Deserialize, Clone)] +pub enum StorageVal { + #[default] + Void, + Number(u64), + Ptr(StorageEntryPtr), + Ref(u64), + Compound( + #[serde(serialize_with = "serialize_rc", deserialize_with = "deserialize_rc")] + Rc, + ), +} - #[serde(serialize_with = "serialize_rc", deserialize_with = "deserialize_rc")] - pub refs: Rc>, +#[derive(Debug, Serialize, Deserialize)] +pub enum StorageCompoundVal { + Array(StorageArray), +} + +#[derive(Debug, Serialize, Deserialize)] +pub struct StorageArray { + pub items: Vec, + + // Skipping serialization because they're stored in the entry. When converting from an entry, we + // copy (todo: move?) the refs from there. + #[serde(skip)] + pub refs: Vec, } impl StorageVal { pub fn to_entry(&self) -> StorageEntry { StorageEntry { ref_count: 1, - refs: self.refs.clone(), - data: bincode::serialize(&self.point).unwrap(), + refs: match self { + StorageVal::Void | StorageVal::Number(_) | StorageVal::Ptr(_) | StorageVal::Ref(_) => { + vec![] + } + StorageVal::Compound(compound) => match &**compound { + StorageCompoundVal::Array(arr) => arr.refs.clone(), + }, + }, + data: bincode::serialize(self).unwrap(), + } + } + + pub fn refs(&self) -> Option<&Vec> { + match self { + StorageVal::Void | StorageVal::Number(_) | StorageVal::Ptr(_) | StorageVal::Ref(_) => None, + StorageVal::Compound(compound) => match &**compound { + StorageCompoundVal::Array(arr) => Some(&arr.refs), + }, } } + pub fn refs_iter(&self) -> impl Iterator { + self.refs().into_iter().flatten() + } + #[cfg(test)] pub(crate) fn numbers( &self, storage: &mut Storage, ) -> Result, SB::Error<()>> { - storage - .sb - .transaction(|sb| self.point.numbers(sb, &self.refs)) + storage.sb.transaction(|sb| self.numbers_impl(sb)) } -} -#[derive(Default, Serialize, Deserialize, PartialEq, Eq, Debug, Clone)] -pub enum StoragePoint { - #[default] - Void, - Number(u64), - Array( - #[serde(serialize_with = "serialize_rc", deserialize_with = "deserialize_rc")] - Rc>, - ), - Ref(u64), -} - -impl StoragePoint { pub fn maybe_replace_store>( &self, tx: &mut SO, - refs: &Rc>, cache: &mut HashMap, ) -> Result, E> { if let Some(id) = self.cache_id() { @@ -65,94 +88,103 @@ impl StoragePoint { } Ok(match &self { - StoragePoint::Void => None, - StoragePoint::Number(_) => None, - StoragePoint::Array(arr) => 'b: { - let mut replacements: Vec<(usize, StorageEntryPtr)> = Vec::new(); - - for i in 0..arr.len() { - if let Some(key) = arr[i].maybe_replace_store(tx, refs, cache)? { - replacements.push((i, key)); + StorageVal::Void | StorageVal::Number(_) | StorageVal::Ptr(_) | StorageVal::Ref(_) => None, + StorageVal::Compound(compound) => match &**compound { + StorageCompoundVal::Array(arr) => 'b: { + let mut replacements: Vec<(usize, StorageEntryPtr)> = Vec::new(); + + for i in 0..arr.items.len() { + if let Some(key) = arr.items[i].maybe_replace_store(tx, cache)? { + replacements.push((i, key)); + } } - } - if replacements.is_empty() { - break 'b Some(cache_and_store( - tx, - &StorageVal { - point: StoragePoint::Array(arr.clone()), - refs: refs.clone(), - }, - cache, - RcKey::from(arr.clone()), - )?); - } + let cache_id = RcKey::from(compound.clone()); - let mut new_arr = Vec::::new(); - let mut new_refs = (**refs).clone(); + if replacements.is_empty() { + break 'b Some(cache_and_store(tx, self, cache, cache_id)?); + } + + let mut new_arr = Vec::::new(); + let mut new_refs = arr.refs.clone(); - let mut replacements_iter = replacements.iter(); - let mut next_replacement = replacements_iter.next(); + let mut replacements_iter = replacements.iter(); + let mut next_replacement = replacements_iter.next(); - for (i, point) in arr.iter().enumerate() { - if let Some((j, entry_ptr)) = next_replacement { - if *j == i { - new_arr.push(StoragePoint::Ref(new_refs.len() as u64)); - new_refs.push(*entry_ptr); - next_replacement = replacements_iter.next(); - continue; + for (i, item) in arr.items.iter().enumerate() { + if let Some((j, entry_ptr)) = next_replacement { + if *j == i { + new_arr.push(StorageVal::Ref(new_refs.len() as u64)); + new_refs.push(*entry_ptr); + next_replacement = replacements_iter.next(); + continue; + } } + + new_arr.push(item.clone()); } - new_arr.push(point.clone()); + Some(cache_and_store( + tx, + &StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: new_arr, + refs: new_refs, + }))), + cache, + cache_id, + )?) } - - Some(cache_and_store( - tx, - &StorageVal { - point: StoragePoint::Array(Rc::new(new_arr)), - refs: Rc::new(new_refs), - }, - cache, - RcKey::from(arr.clone()), - )?) - } - StoragePoint::Ref(_) => None, + }, }) } fn cache_id(&self) -> Option { - match &self { - StoragePoint::Void => None, - StoragePoint::Number(_) => None, - StoragePoint::Array(arr) => Some(RcKey::from(arr.clone())), - StoragePoint::Ref(_) => None, + match self { + StorageVal::Void => None, + StorageVal::Number(_) => None, + StorageVal::Ptr(_) => None, + StorageVal::Ref(_) => None, + StorageVal::Compound(compound) => Some(RcKey::from(compound.clone())), } } #[cfg(test)] - fn numbers>( - &self, - tx: &mut SO, - refs: &Rc>, - ) -> Result, E> { + fn numbers_impl>(&self, tx: &mut SO) -> Result, E> { match &self { - StoragePoint::Void => Ok(Vec::new()), - StoragePoint::Number(n) => Ok(vec![*n]), - StoragePoint::Array(arr) => { - let mut numbers = Vec::new(); + StorageVal::Void => Ok(Vec::new()), + StorageVal::Number(n) => Ok(vec![*n]), + StorageVal::Ptr(ptr) => { + let entry = tx.read(*ptr)?.unwrap(); + entry.move_to_val().numbers_impl(tx) + } + // StorageVal::Array(arr) => { + // let mut numbers = Vec::new(); - for point in arr.iter() { - numbers.extend(point.numbers(tx, refs)?); - } + // for point in arr.iter() { + // numbers.extend(point.numbers(tx, refs)?); + // } - Ok(numbers) - } - StoragePoint::Ref(i) => { - let key = refs[*i as usize]; - let val = tx.read(key)?.unwrap().to_val(); - val.point.numbers(tx, &val.refs) + // Ok(numbers) + // } + StorageVal::Ref(_) => { + panic!("Can't lookup ref (shouldn't hit this case)") } + StorageVal::Compound(compound) => match &**compound { + StorageCompoundVal::Array(arr) => { + let mut numbers = Vec::new(); + + for item in &arr.items { + if let StorageVal::Ref(i) = item { + let item = &StorageVal::Ptr(arr.refs[*i as usize]); + numbers.extend(item.numbers_impl(tx)?); + } else { + numbers.extend(item.numbers_impl(tx)?); + } + } + + Ok(numbers) + } + }, } } } @@ -160,19 +192,30 @@ impl StoragePoint { #[derive(Serialize, Deserialize)] pub struct StorageEntry { pub(crate) ref_count: u64, - - #[serde(serialize_with = "serialize_rc", deserialize_with = "deserialize_rc")] - pub(crate) refs: Rc>, + pub(crate) refs: Vec, data: Vec, } impl StorageEntry { - pub fn to_val(&self) -> StorageVal { - StorageVal { - point: bincode::deserialize(&self.data).unwrap(), - refs: self.refs.clone(), - } + pub fn move_to_val(self) -> StorageVal { + let Self { + ref_count: _, + refs, + data, + } = self; + + let mut val = bincode::deserialize::(&data).unwrap(); + + if let StorageVal::Compound(compound) = &mut val { + match Rc::get_mut(compound).expect("Should be single ref") { + StorageCompoundVal::Array(arr) => { + arr.refs = refs; + } + } + }; + + val } } diff --git a/storage/src/tests.rs b/storage/src/tests.rs index 27f1fa9..6c37365 100644 --- a/storage/src/tests.rs +++ b/storage/src/tests.rs @@ -7,7 +7,7 @@ mod tests_ { sled_backend::SledBackend, storage::Storage, storage_ptr::storage_head_ptr, - storage_val::{StoragePoint, StorageVal}, + storage_val::{StorageArray, StorageCompoundVal, StorageVal}, StorageBackend, }; @@ -23,13 +23,7 @@ mod tests_ { fn number() { fn impl_(storage: &mut Storage) { storage - .set_head( - storage_head_ptr(b"test"), - Some(&StorageVal { - point: StoragePoint::Number(123), - refs: Rc::new(vec![]), - }), - ) + .set_head(storage_head_ptr(b"test"), Some(&StorageVal::Number(123))) .unwrap(); let val = storage @@ -37,7 +31,11 @@ mod tests_ { .unwrap() .unwrap(); - assert_eq!(val.point, StoragePoint::Number(123)); + if let StorageVal::Number(val) = val { + assert_eq!(val, 123); + } else { + panic!("Expected number"); + } } run(impl_, impl_); @@ -46,27 +44,18 @@ mod tests_ { #[test] fn array_0_1() { fn impl_(storage: &mut Storage) { - let key0 = storage - .store_tmp(&StorageVal { - point: StoragePoint::Number(0), - refs: Rc::new(vec![]), - }) - .unwrap(); - - let key1 = storage - .store_tmp(&StorageVal { - point: StoragePoint::Number(1), - refs: Rc::new(vec![]), - }) - .unwrap(); + let key0 = storage.store_tmp(&StorageVal::Number(0)).unwrap(); + let key1 = storage.store_tmp(&StorageVal::Number(1)).unwrap(); storage .set_head( storage_head_ptr(b"test"), - Some(&StorageVal { - point: StoragePoint::Array(Rc::new(vec![StoragePoint::Ref(0), StoragePoint::Ref(1)])), - refs: Rc::new(vec![key0, key1]), - }), + Some(&StorageVal::Compound(Rc::new(StorageCompoundVal::Array( + StorageArray { + items: vec![StorageVal::Ref(0), StorageVal::Ref(1)], + refs: vec![key0, key1], + }, + )))), ) .unwrap(); @@ -95,19 +84,21 @@ mod tests_ { storage .set_head( storage_head_ptr(b"test"), - Some(&StorageVal { - point: StoragePoint::Array(Rc::new(vec![ - StoragePoint::Array(Rc::new(vec![ - StoragePoint::Number(1), - StoragePoint::Number(2), - ])), - StoragePoint::Array(Rc::new(vec![ - StoragePoint::Number(3), - StoragePoint::Number(4), - ])), - ])), - refs: Rc::new(vec![]), - }), + Some(&StorageVal::Compound(Rc::new(StorageCompoundVal::Array( + StorageArray { + items: vec![ + StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![StorageVal::Number(1), StorageVal::Number(2)], + refs: vec![], + }))), + StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![StorageVal::Number(3), StorageVal::Number(4)], + refs: vec![], + }))), + ], + refs: vec![], + }, + )))), ) .unwrap(); @@ -127,22 +118,22 @@ mod tests_ { #[test] fn small_redundant_tree() { fn impl_(storage: &mut Storage) { - let mut arr = StoragePoint::Array(Rc::new(vec![StoragePoint::Number(123)])); + let mut arr = StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![StorageVal::Number(123)], + refs: vec![], + }))); let depth = 3; for _ in 0..depth { - arr = StoragePoint::Array(Rc::new(vec![arr.clone(), arr])); + arr = StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![arr.clone(), arr], + refs: vec![], + }))); } storage - .set_head( - storage_head_ptr(b"test"), - Some(&StorageVal { - point: arr, - refs: Rc::new(vec![]), - }), - ) + .set_head(storage_head_ptr(b"test"), Some(&arr)) .unwrap(); let value = storage @@ -167,24 +158,24 @@ mod tests_ { #[test] fn large_redundant_tree() { fn impl_(storage: &mut Storage) { - let mut arr = StoragePoint::Array(Rc::new(vec![StoragePoint::Number(123)])); + let mut arr = StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![StorageVal::Number(123)], + refs: vec![], + }))); // 2^100 = 1267650600228229401496703205376 // This tests that we can handle a tree with 2^100 nodes. // We do this by reusing the same array as both children of the parent array. // It's particularly important that this also happens during storage. for _ in 0..100 { - arr = StoragePoint::Array(Rc::new(vec![arr.clone(), arr])); + arr = StorageVal::Compound(Rc::new(StorageCompoundVal::Array(StorageArray { + items: vec![arr.clone(), arr], + refs: vec![], + }))); } storage - .set_head( - storage_head_ptr(b"test"), - Some(&StorageVal { - point: arr, - refs: Rc::new(vec![]), - }), - ) + .set_head(storage_head_ptr(b"test"), Some(&arr)) .unwrap(); let value = storage @@ -192,8 +183,9 @@ mod tests_ { .unwrap() .unwrap(); - if let StoragePoint::Array(arr) = value.point { - assert_eq!(arr.len(), 2); + if let StorageVal::Compound(compound) = value { + let StorageCompoundVal::Array(arr) = &*compound; + assert_eq!(arr.items.len(), 2); } else { panic!("Expected array"); }