123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458 |
- //! Featherweight string-by-reference implementation.
- //!
- //! You might want this crate if:
- //! - you have a number of string values, the set of which is determined dynamically but does
- //! not grow (much) over time, such as from an initial configuration step, and
- //! - you want to be able to compare these string values repeatedly at very low cost, and
- //! - your usage pattern leans towards many comparisons but relatively few parses.
- //!
- //! You probably don't want this crate if:
- //! - the set of string values you care about changes significantly over time, or
- //! - your usage pattern leans towards constructing strings and only rarely comparing them.
- //!
- //! Conceptually, a [`StoredString`] is a lightweight reference to a string inside a
- //! global string storage. The intended use is for rapid equality checks between strings drawn from
- //! a small set, where the overhead of byte-by-byte string comparison is overkill.
- //!
- //! More concretely, each [`StoredString`] contains a pointer to a string stored in
- //! a global hash table. Since the hash table guarantees uniqueness, a string comparison can be
- //! reduced to simply a pointer comparison. Constructing a `StoredString` involves a mutex lock and
- //! hash table lookup, but copying one is nearly free as it is simply a pointer copy. No reference
- //! tracking is performed, so any strings added to the global hash table **will remain allocated
- //! until program exit**.
- //!
- //! To avoid potential complications stemming from strings with different semantics sharing the same unique
- //! pointer value, `StoredString` is abstracted across a [`NamespaceTag`](trait.NamespaceTag.html)
- //! implementation. This introduces no runtime overhead, but allows for catching many errors at
- //! compile-time, and still shares pointer values internally. As a concrete example; this is
- //! perfectly accepted:
- //!
- //! ```rust
- //! # use stringstore::{StoredString,NamespaceTag};
- //! # struct Tag;
- //! # impl NamespaceTag for Tag { const PREFIX: &'static str = "tag"; }
- //! // we can compare StoredStrings directly
- //! assert_eq!(StoredString::<Tag>::new("string"), StoredString::<Tag>::new("string"));
- //! ```
- //!
- //! But this is not:
- //! ```compile_fail
- //! # use stringstore::StoredString;
- //! let ss1 = StoredString::<Tag1>::new("string");
- //! let ss2 = StoredString::<Tag2>::new("string");
- //! assert_eq!(ss1.as_str().as_ptr(), ss2.as_str().as_ptr()); // this holds
- //! assert_eq!(ss1, ss2); // this is a compilation error
- //! ```
- //!
- //! The use of type aliases is highly encouraged.
- use std::ffi::{OsStr, OsString};
- static STR_STORE: std::sync::LazyLock<std::sync::Mutex<std::collections::HashSet<&'static str>>> =
- std::sync::LazyLock::new(Default::default);
- pub trait NamespaceTag: 'static {
- /// Prefix to use when displaying strings with this tag in debug contexts.
- const PREFIX: &'static str;
- }
- /// See crate documentation for general description.
- pub struct StoredString<Tag: 'static> {
- stored: &'static str,
- _ghost: std::marker::PhantomData<Tag>,
- }
- impl<Tag: 'static> Clone for StoredString<Tag> {
- fn clone(&self) -> Self {
- *self
- }
- }
- impl<Tag: 'static> Copy for StoredString<Tag> {}
- impl<Tag: 'static> std::ops::Deref for StoredString<Tag> {
- type Target = str;
- fn deref(&self) -> &Self::Target {
- self.stored
- }
- }
- impl<Tag: 'static> AsRef<str> for StoredString<Tag> {
- fn as_ref(&self) -> &str {
- self.stored
- }
- }
- impl<Tag: 'static> std::fmt::Display for StoredString<Tag> {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- <str as std::fmt::Display>::fmt(self.stored, f)
- }
- }
- impl<Tag: NamespaceTag> std::fmt::Debug for StoredString<Tag> {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- f.write_fmt(format_args!("{}:{}", Tag::PREFIX, self.stored))
- }
- }
- impl<Tag: 'static> std::hash::Hash for StoredString<Tag> {
- fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
- // pointers are unique per content by construction
- (self.stored as *const str).hash(state);
- }
- }
- impl<Tag: 'static> PartialEq for StoredString<Tag> {
- fn eq(&self, other: &Self) -> bool {
- self.stored.as_ptr() == other.stored.as_ptr()
- }
- }
- impl<Tag: 'static> Eq for StoredString<Tag> {
- fn assert_receiver_is_total_eq(&self) {}
- }
- impl<Tag: 'static> PartialOrd for StoredString<Tag> {
- fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
- Some(<Self as Ord>::cmp(self, other))
- }
- }
- impl<Tag: 'static> Ord for StoredString<Tag> {
- fn cmp(&self, other: &Self) -> std::cmp::Ordering {
- self.stored.as_ptr().cmp(&other.stored.as_ptr())
- }
- }
- impl<Tag> StoredString<Tag> {
- pub fn new(from: &str) -> Self {
- let mut mg = STR_STORE.lock().expect("couldn't lock STR_STORE?");
- match mg.get(from) {
- Some(name) => Self {
- stored: name,
- _ghost: Default::default(),
- },
- None => {
- let s = Box::leak(from.to_owned().into_boxed_str());
- mg.insert(s);
- Self {
- stored: mg.get(from).unwrap(),
- _ghost: Default::default(),
- }
- }
- }
- }
- pub fn as_str(&self) -> &'static str {
- self.stored
- }
- /// Coerce a StoredString between namespaces. This exists solely to change the type and
- /// performs no actual work under the hood.
- pub fn coerce<Tag2>(&self) -> StoredString<Tag2> {
- StoredString {
- stored: self.stored,
- _ghost: Default::default(),
- }
- }
- }
- #[cfg(feature = "serde")]
- struct StoredStringVisitor<Tag>(std::marker::PhantomData<Tag>);
- #[cfg(feature = "serde")]
- impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredStringVisitor<Tag> {
- type Value = StoredString<Tag>;
- fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
- write!(formatter, "stored string")
- }
- fn visit_borrowed_str<E: serde::de::Error>(self, v: &'de str) -> Result<Self::Value, E> {
- Ok(StoredString::new(v))
- }
- fn visit_string<E: serde::de::Error>(self, v: String) -> Result<Self::Value, E> {
- Ok(v.into())
- }
- }
- #[cfg(feature = "serde")]
- impl<'de, Tag> serde::de::Deserialize<'de> for StoredString<Tag> {
- fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
- where
- D: serde::Deserializer<'de>,
- {
- deserializer.deserialize_string(StoredStringVisitor::<Tag>(std::marker::PhantomData))
- }
- }
- #[cfg(feature = "serde")]
- impl<Tag> serde::ser::Serialize for StoredString<Tag> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- serializer.serialize_str(self.as_str())
- }
- }
- impl<Tag: 'static> From<String> for StoredString<Tag> {
- fn from(value: String) -> Self {
- Self::new(value.as_str())
- }
- }
- impl<'l, Tag: 'static> From<&'l String> for StoredString<Tag> {
- fn from(value: &'l String) -> Self {
- Self::new(value.as_str())
- }
- }
- impl<'l, Tag: 'static> From<&'l str> for StoredString<Tag> {
- fn from(value: &'l str) -> Self {
- Self::new(value)
- }
- }
- #[cfg(test)]
- mod test_stored_string {
- struct TestTag;
- impl super::NamespaceTag for TestTag {
- const PREFIX: &'static str = "test";
- }
- impl super::NamespaceTag for () {
- const PREFIX: &'static str = "unit";
- }
- type SS = super::StoredString<()>;
- type SST = super::StoredString<TestTag>;
- #[test]
- fn build_test() {
- assert_eq!(SS::new("ss"), SS::new("ss"));
- assert_eq!(SST::new("ss"), SS::new("ss").coerce());
- let ss = SST::new("ss");
- let ss2 = ss.clone();
- let ss3 = ss;
- assert_eq!(ss, ss2);
- assert_eq!(ss, ss3);
- }
- #[cfg(feature = "serde")]
- #[test]
- fn deserialization() {
- use serde::Deserialize;
- // mocked deserializer that provides exactly a single string
- struct MockDeser<'de>(&'de mut usize);
- impl<'de> serde::Deserializer<'de> for &'de mut MockDeser<'de> {
- type Error = serde::de::value::Error;
- fn deserialize_any<V: serde::de::Visitor<'de>>(
- self,
- _: V,
- ) -> Result<V::Value, Self::Error> {
- panic!()
- }
- fn deserialize_str<V: serde::de::Visitor<'de>>(
- self,
- visitor: V,
- ) -> Result<V::Value, Self::Error> {
- self.deserialize_string(visitor)
- }
- fn deserialize_string<V: serde::de::Visitor<'de>>(
- self,
- visitor: V,
- ) -> Result<V::Value, Self::Error> {
- *self.0 += 1;
- if *self.0 == 1 {
- visitor.visit_string(String::from("string"))
- } else {
- panic!(
- "trying to deserialize a second string; state is now {}",
- self.0
- )
- }
- }
- serde::forward_to_deserialize_any!(
- bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char bytes
- byte_buf option unit unit_struct newtype_struct seq tuple
- tuple_struct map struct enum identifier ignored_any
- );
- }
- let mut state = 0;
- let mut deser = MockDeser(&mut state);
- assert_eq!(SST::deserialize(&mut deser), Ok(SST::new("string")));
- }
- }
- static OS_STR_STORE: std::sync::LazyLock<
- std::sync::Mutex<std::collections::HashSet<&'static OsStr>>,
- > = std::sync::LazyLock::new(Default::default);
- /// See crate documentation for general description.
- pub struct StoredOsString<Tag: 'static> {
- stored: &'static OsStr,
- _ghost: std::marker::PhantomData<Tag>,
- }
- impl<Tag: 'static> Clone for StoredOsString<Tag> {
- fn clone(&self) -> Self {
- *self
- }
- }
- impl<Tag: 'static> Copy for StoredOsString<Tag> {}
- impl<Tag: 'static> std::ops::Deref for StoredOsString<Tag> {
- type Target = OsStr;
- fn deref(&self) -> &Self::Target {
- self.stored
- }
- }
- impl<Tag: 'static> AsRef<OsStr> for StoredOsString<Tag> {
- fn as_ref(&self) -> &OsStr {
- self.stored
- }
- }
- impl<Tag: NamespaceTag> std::fmt::Debug for StoredOsString<Tag> {
- fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
- f.write_fmt(format_args!("{}:{:?}", Tag::PREFIX, self.stored))
- }
- }
- impl<Tag: 'static> std::hash::Hash for StoredOsString<Tag> {
- fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
- // pointers are unique per content by construction
- (self.stored as *const OsStr).hash(state);
- }
- }
- impl<Tag: 'static> PartialEq for StoredOsString<Tag> {
- fn eq(&self, other: &Self) -> bool {
- // pointers are unique per content by construction
- self.stored.as_encoded_bytes().as_ptr() == other.stored.as_encoded_bytes().as_ptr()
- }
- }
- impl<Tag: 'static> Eq for StoredOsString<Tag> {
- fn assert_receiver_is_total_eq(&self) {}
- }
- impl<Tag: 'static> PartialOrd for StoredOsString<Tag> {
- fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
- Some(<Self as Ord>::cmp(self, other))
- }
- }
- impl<Tag: 'static> Ord for StoredOsString<Tag> {
- fn cmp(&self, other: &Self) -> std::cmp::Ordering {
- self.stored
- .as_encoded_bytes()
- .as_ptr()
- .cmp(&other.stored.as_encoded_bytes().as_ptr())
- }
- }
- impl<Tag> StoredOsString<Tag> {
- pub fn new(from: &OsStr) -> Self {
- let mut mg = OS_STR_STORE.lock().expect("couldn't lock STR_STORE?");
- match mg.get(from) {
- Some(name) => Self {
- stored: name,
- _ghost: Default::default(),
- },
- None => {
- let s = Box::leak(from.to_owned().into_boxed_os_str());
- mg.insert(s);
- Self {
- stored: mg.get(from).unwrap(),
- _ghost: Default::default(),
- }
- }
- }
- }
- pub fn as_str(&self) -> &'static OsStr {
- self.stored
- }
- /// Coerce a StoredOsString between namespaces. This exists solely to change the type and
- /// performs no actual work under the hood.
- pub fn coerce<Tag2>(&self) -> StoredOsString<Tag2> {
- StoredOsString {
- stored: self.stored,
- _ghost: Default::default(),
- }
- }
- }
- #[cfg(feature = "serde")]
- struct StoredOsStringVisitor<Tag>(std::marker::PhantomData<Tag>);
- #[cfg(feature = "serde")]
- impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredOsStringVisitor<Tag> {
- type Value = StoredOsString<Tag>;
- fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
- write!(formatter, "stored OsString")
- }
- fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Self::Value, E> {
- Ok(StoredOsString::from(v))
- }
- }
- #[cfg(feature = "serde")]
- impl<'de, Tag> serde::de::Deserialize<'de> for StoredOsString<Tag> {
- fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
- where
- D: serde::Deserializer<'de>,
- {
- deserializer.deserialize_bytes(StoredOsStringVisitor::<Tag>(std::marker::PhantomData))
- }
- }
- #[cfg(feature = "serde")]
- impl<Tag> serde::ser::Serialize for StoredOsString<Tag> {
- fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
- where
- S: serde::Serializer,
- {
- serializer.serialize_bytes(self.as_encoded_bytes())
- }
- }
- impl<Tag: 'static> From<OsString> for StoredOsString<Tag> {
- fn from(value: OsString) -> Self {
- Self::new(value.as_os_str())
- }
- }
- impl<'l, Tag: 'static> From<&'l OsString> for StoredOsString<Tag> {
- fn from(value: &'l OsString) -> Self {
- Self::new(value.as_os_str())
- }
- }
- impl<'l, Tag: 'static> From<&'l OsStr> for StoredOsString<Tag> {
- fn from(value: &'l OsStr) -> Self {
- Self::new(value)
- }
- }
- #[cfg(target_os = "linux")]
- impl<'l, Tag: 'static> From<&'l [u8]> for StoredOsString<Tag> {
- fn from(value: &'l [u8]) -> Self {
- use std::os::unix::ffi::OsStrExt;
- OsStr::from_bytes(value).into()
- }
- }
|