//! Featherweight string-by-reference implementation. //! //! You might want this crate if: //! - you have a number of string values, the set of which is determined dynamically but does //! not grow (much) over time, such as from an initial configuration step, and //! - you want to be able to compare these string values repeatedly at very low cost, and //! - your usage pattern leans towards many comparisons but relatively few parses. //! //! You probably don't want this crate if: //! - the set of string values you care about changes significantly over time, or //! - your usage pattern leans towards constructing strings and only rarely comparing them. //! //! Conceptually, a [`StoredString`] is a lightweight reference to a string inside a //! global string storage. The intended use is for rapid equality checks between strings drawn from //! a small set, where the overhead of byte-by-byte string comparison is overkill. //! //! More concretely, each [`StoredString`] contains a pointer to a string stored in //! a global hash table. Since the hash table guarantees uniqueness, a string comparison can be //! reduced to simply a pointer comparison. Constructing a `StoredString` involves a mutex lock and //! hash table lookup, but copying one is nearly free as it is simply a pointer copy. No reference //! tracking is performed, so any strings added to the global hash table **will remain allocated //! until program exit**. //! //! To avoid potential complications stemming from strings with different semantics sharing the same unique //! pointer value, `StoredString` is abstracted across a [`NamespaceTag`](trait.NamespaceTag.html) //! implementation. This introduces no runtime overhead, but allows for catching many errors at //! compile-time, and still shares pointer values internally. As a concrete example; this is //! perfectly accepted: //! //! ```rust //! # use stringstore::{StoredString,NamespaceTag}; //! # struct Tag; //! # impl NamespaceTag for Tag { const PREFIX: &'static str = "tag"; } //! // we can compare StoredStrings directly //! assert_eq!(StoredString::::new("string"), StoredString::::new("string")); //! ``` //! //! But this is not: //! ```compile_fail //! # use stringstore::StoredString; //! let ss1 = StoredString::::new("string"); //! let ss2 = StoredString::::new("string"); //! assert_eq!(ss1.as_str().as_ptr(), ss2.as_str().as_ptr()); // this holds //! assert_eq!(ss1, ss2); // this is a compilation error //! ``` //! //! The use of type aliases is highly encouraged. use std::ffi::{OsStr, OsString}; static STR_STORE: std::sync::LazyLock>> = std::sync::LazyLock::new(Default::default); pub trait NamespaceTag: 'static { /// Prefix to use when displaying strings with this tag in debug contexts. const PREFIX: &'static str; } /// See crate documentation for general description. pub struct StoredString { stored: &'static str, _ghost: std::marker::PhantomData, } impl Clone for StoredString { fn clone(&self) -> Self { *self } } impl Copy for StoredString {} impl std::ops::Deref for StoredString { type Target = str; fn deref(&self) -> &Self::Target { self.stored } } impl AsRef for StoredString { fn as_ref(&self) -> &str { self.stored } } impl std::fmt::Display for StoredString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { ::fmt(self.stored, f) } } impl std::fmt::Debug for StoredString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}:{}", Tag::PREFIX, self.stored)) } } impl std::hash::Hash for StoredString { fn hash(&self, state: &mut H) { // pointers are unique per content by construction (self.stored as *const str).hash(state); } } impl PartialEq for StoredString { fn eq(&self, other: &Self) -> bool { self.stored.as_ptr() == other.stored.as_ptr() } } impl Eq for StoredString { fn assert_receiver_is_total_eq(&self) {} } impl PartialOrd for StoredString { fn partial_cmp(&self, other: &Self) -> Option { Some(::cmp(self, other)) } } impl Ord for StoredString { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.stored.as_ptr().cmp(&other.stored.as_ptr()) } } impl StoredString { pub fn new(from: &str) -> Self { let mut mg = STR_STORE.lock().expect("couldn't lock STR_STORE?"); match mg.get(from) { Some(name) => Self { stored: name, _ghost: Default::default(), }, None => { let s = Box::leak(from.to_owned().into_boxed_str()); mg.insert(s); Self { stored: mg.get(from).unwrap(), _ghost: Default::default(), } } } } pub fn as_str(&self) -> &'static str { self.stored } /// Coerce a StoredString between namespaces. This exists solely to change the type and /// performs no actual work under the hood. pub fn coerce(&self) -> StoredString { StoredString { stored: self.stored, _ghost: Default::default(), } } } #[cfg(feature = "serde")] struct StoredStringVisitor(std::marker::PhantomData); #[cfg(feature = "serde")] impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredStringVisitor { type Value = StoredString; fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { write!(formatter, "stored string") } fn visit_borrowed_str(self, v: &'de str) -> Result { Ok(StoredString::new(v)) } fn visit_string(self, v: String) -> Result { Ok(v.into()) } } #[cfg(feature = "serde")] impl<'de, Tag> serde::de::Deserialize<'de> for StoredString { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { deserializer.deserialize_string(StoredStringVisitor::(std::marker::PhantomData)) } } #[cfg(feature = "serde")] impl serde::ser::Serialize for StoredString { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { serializer.serialize_str(self.as_str()) } } impl From for StoredString { fn from(value: String) -> Self { Self::new(value.as_str()) } } impl<'l, Tag: 'static> From<&'l String> for StoredString { fn from(value: &'l String) -> Self { Self::new(value.as_str()) } } impl<'l, Tag: 'static> From<&'l str> for StoredString { fn from(value: &'l str) -> Self { Self::new(value) } } #[cfg(test)] mod test_stored_string { struct TestTag; impl super::NamespaceTag for TestTag { const PREFIX: &'static str = "test"; } impl super::NamespaceTag for () { const PREFIX: &'static str = "unit"; } type SS = super::StoredString<()>; type SST = super::StoredString; #[test] fn build_test() { assert_eq!(SS::new("ss"), SS::new("ss")); assert_eq!(SST::new("ss"), SS::new("ss").coerce()); let ss = SST::new("ss"); let ss2 = ss.clone(); let ss3 = ss; assert_eq!(ss, ss2); assert_eq!(ss, ss3); } #[cfg(feature = "serde")] #[test] fn deserialization() { use serde::Deserialize; // mocked deserializer that provides exactly a single string struct MockDeser<'de>(&'de mut usize); impl<'de> serde::Deserializer<'de> for &'de mut MockDeser<'de> { type Error = serde::de::value::Error; fn deserialize_any>( self, _: V, ) -> Result { panic!() } fn deserialize_str>( self, visitor: V, ) -> Result { self.deserialize_string(visitor) } fn deserialize_string>( self, visitor: V, ) -> Result { *self.0 += 1; if *self.0 == 1 { visitor.visit_string(String::from("string")) } else { panic!( "trying to deserialize a second string; state is now {}", self.0 ) } } serde::forward_to_deserialize_any!( bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char bytes byte_buf option unit unit_struct newtype_struct seq tuple tuple_struct map struct enum identifier ignored_any ); } let mut state = 0; let mut deser = MockDeser(&mut state); assert_eq!(SST::deserialize(&mut deser), Ok(SST::new("string"))); } } static OS_STR_STORE: std::sync::LazyLock< std::sync::Mutex>, > = std::sync::LazyLock::new(Default::default); /// See crate documentation for general description. pub struct StoredOsString { stored: &'static OsStr, _ghost: std::marker::PhantomData, } impl Clone for StoredOsString { fn clone(&self) -> Self { *self } } impl Copy for StoredOsString {} impl std::ops::Deref for StoredOsString { type Target = OsStr; fn deref(&self) -> &Self::Target { self.stored } } impl AsRef for StoredOsString { fn as_ref(&self) -> &OsStr { self.stored } } impl std::fmt::Debug for StoredOsString { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { f.write_fmt(format_args!("{}:{:?}", Tag::PREFIX, self.stored)) } } impl std::hash::Hash for StoredOsString { fn hash(&self, state: &mut H) { // pointers are unique per content by construction (self.stored as *const OsStr).hash(state); } } impl PartialEq for StoredOsString { fn eq(&self, other: &Self) -> bool { // pointers are unique per content by construction self.stored.as_encoded_bytes().as_ptr() == other.stored.as_encoded_bytes().as_ptr() } } impl Eq for StoredOsString { fn assert_receiver_is_total_eq(&self) {} } impl PartialOrd for StoredOsString { fn partial_cmp(&self, other: &Self) -> Option { Some(::cmp(self, other)) } } impl Ord for StoredOsString { fn cmp(&self, other: &Self) -> std::cmp::Ordering { self.stored .as_encoded_bytes() .as_ptr() .cmp(&other.stored.as_encoded_bytes().as_ptr()) } } impl StoredOsString { pub fn new(from: &OsStr) -> Self { let mut mg = OS_STR_STORE.lock().expect("couldn't lock STR_STORE?"); match mg.get(from) { Some(name) => Self { stored: name, _ghost: Default::default(), }, None => { let s = Box::leak(from.to_owned().into_boxed_os_str()); mg.insert(s); Self { stored: mg.get(from).unwrap(), _ghost: Default::default(), } } } } pub fn as_str(&self) -> &'static OsStr { self.stored } /// Coerce a StoredOsString between namespaces. This exists solely to change the type and /// performs no actual work under the hood. pub fn coerce(&self) -> StoredOsString { StoredOsString { stored: self.stored, _ghost: Default::default(), } } } #[cfg(feature = "serde")] struct StoredOsStringVisitor(std::marker::PhantomData); #[cfg(feature = "serde")] impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredOsStringVisitor { type Value = StoredOsString; fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result { write!(formatter, "stored OsString") } fn visit_bytes(self, v: &[u8]) -> Result { Ok(StoredOsString::from(v)) } } #[cfg(feature = "serde")] impl<'de, Tag> serde::de::Deserialize<'de> for StoredOsString { fn deserialize(deserializer: D) -> Result where D: serde::Deserializer<'de>, { deserializer.deserialize_bytes(StoredOsStringVisitor::(std::marker::PhantomData)) } } #[cfg(feature = "serde")] impl serde::ser::Serialize for StoredOsString { fn serialize(&self, serializer: S) -> Result where S: serde::Serializer, { serializer.serialize_bytes(self.as_encoded_bytes()) } } impl From for StoredOsString { fn from(value: OsString) -> Self { Self::new(value.as_os_str()) } } impl<'l, Tag: 'static> From<&'l OsString> for StoredOsString { fn from(value: &'l OsString) -> Self { Self::new(value.as_os_str()) } } impl<'l, Tag: 'static> From<&'l OsStr> for StoredOsString { fn from(value: &'l OsStr) -> Self { Self::new(value) } } #[cfg(target_os = "linux")] impl<'l, Tag: 'static> From<&'l [u8]> for StoredOsString { fn from(value: &'l [u8]) -> Self { use std::os::unix::ffi::OsStrExt; OsStr::from_bytes(value).into() } }