lib.rs 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458
  1. //! Featherweight string-by-reference implementation.
  2. //!
  3. //! You might want this crate if:
  4. //! - you have a number of string values, the set of which is determined dynamically but does
  5. //! not grow (much) over time, such as from an initial configuration step, and
  6. //! - you want to be able to compare these string values repeatedly at very low cost, and
  7. //! - your usage pattern leans towards many comparisons but relatively few parses.
  8. //!
  9. //! You probably don't want this crate if:
  10. //! - the set of string values you care about changes significantly over time, or
  11. //! - your usage pattern leans towards constructing strings and only rarely comparing them.
  12. //!
  13. //! Conceptually, a [`StoredString`] is a lightweight reference to a string inside a
  14. //! global string storage. The intended use is for rapid equality checks between strings drawn from
  15. //! a small set, where the overhead of byte-by-byte string comparison is overkill.
  16. //!
  17. //! More concretely, each [`StoredString`] contains a pointer to a string stored in
  18. //! a global hash table. Since the hash table guarantees uniqueness, a string comparison can be
  19. //! reduced to simply a pointer comparison. Constructing a `StoredString` involves a mutex lock and
  20. //! hash table lookup, but copying one is nearly free as it is simply a pointer copy. No reference
  21. //! tracking is performed, so any strings added to the global hash table **will remain allocated
  22. //! until program exit**.
  23. //!
  24. //! To avoid potential complications stemming from strings with different semantics sharing the same unique
  25. //! pointer value, `StoredString` is abstracted across a [`NamespaceTag`](trait.NamespaceTag.html)
  26. //! implementation. This introduces no runtime overhead, but allows for catching many errors at
  27. //! compile-time, and still shares pointer values internally. As a concrete example; this is
  28. //! perfectly accepted:
  29. //!
  30. //! ```rust
  31. //! # use stringstore::{StoredString,NamespaceTag};
  32. //! # struct Tag;
  33. //! # impl NamespaceTag for Tag { const PREFIX: &'static str = "tag"; }
  34. //! // we can compare StoredStrings directly
  35. //! assert_eq!(StoredString::<Tag>::new("string"), StoredString::<Tag>::new("string"));
  36. //! ```
  37. //!
  38. //! But this is not:
  39. //! ```compile_fail
  40. //! # use stringstore::StoredString;
  41. //! let ss1 = StoredString::<Tag1>::new("string");
  42. //! let ss2 = StoredString::<Tag2>::new("string");
  43. //! assert_eq!(ss1.as_str().as_ptr(), ss2.as_str().as_ptr()); // this holds
  44. //! assert_eq!(ss1, ss2); // this is a compilation error
  45. //! ```
  46. //!
  47. //! The use of type aliases is highly encouraged.
  48. use std::ffi::{OsStr, OsString};
  49. static STR_STORE: std::sync::LazyLock<std::sync::Mutex<std::collections::HashSet<&'static str>>> =
  50. std::sync::LazyLock::new(Default::default);
  51. pub trait NamespaceTag: 'static {
  52. /// Prefix to use when displaying strings with this tag in debug contexts.
  53. const PREFIX: &'static str;
  54. }
  55. /// See crate documentation for general description.
  56. pub struct StoredString<Tag: 'static> {
  57. stored: &'static str,
  58. _ghost: std::marker::PhantomData<Tag>,
  59. }
  60. impl<Tag: 'static> Clone for StoredString<Tag> {
  61. fn clone(&self) -> Self {
  62. *self
  63. }
  64. }
  65. impl<Tag: 'static> Copy for StoredString<Tag> {}
  66. impl<Tag: 'static> std::ops::Deref for StoredString<Tag> {
  67. type Target = str;
  68. fn deref(&self) -> &Self::Target {
  69. self.stored
  70. }
  71. }
  72. impl<Tag: 'static> AsRef<str> for StoredString<Tag> {
  73. fn as_ref(&self) -> &str {
  74. self.stored
  75. }
  76. }
  77. impl<Tag: 'static> std::fmt::Display for StoredString<Tag> {
  78. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  79. <str as std::fmt::Display>::fmt(self.stored, f)
  80. }
  81. }
  82. impl<Tag: NamespaceTag> std::fmt::Debug for StoredString<Tag> {
  83. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  84. f.write_fmt(format_args!("{}:{}", Tag::PREFIX, self.stored))
  85. }
  86. }
  87. impl<Tag: 'static> std::hash::Hash for StoredString<Tag> {
  88. fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
  89. // pointers are unique per content by construction
  90. (self.stored as *const str).hash(state);
  91. }
  92. }
  93. impl<Tag: 'static> PartialEq for StoredString<Tag> {
  94. fn eq(&self, other: &Self) -> bool {
  95. self.stored.as_ptr() == other.stored.as_ptr()
  96. }
  97. }
  98. impl<Tag: 'static> Eq for StoredString<Tag> {
  99. fn assert_receiver_is_total_eq(&self) {}
  100. }
  101. impl<Tag: 'static> PartialOrd for StoredString<Tag> {
  102. fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
  103. Some(<Self as Ord>::cmp(self, other))
  104. }
  105. }
  106. impl<Tag: 'static> Ord for StoredString<Tag> {
  107. fn cmp(&self, other: &Self) -> std::cmp::Ordering {
  108. self.stored.as_ptr().cmp(&other.stored.as_ptr())
  109. }
  110. }
  111. impl<Tag> StoredString<Tag> {
  112. pub fn new(from: &str) -> Self {
  113. let mut mg = STR_STORE.lock().expect("couldn't lock STR_STORE?");
  114. match mg.get(from) {
  115. Some(name) => Self {
  116. stored: name,
  117. _ghost: Default::default(),
  118. },
  119. None => {
  120. let s = Box::leak(from.to_owned().into_boxed_str());
  121. mg.insert(s);
  122. Self {
  123. stored: mg.get(from).unwrap(),
  124. _ghost: Default::default(),
  125. }
  126. }
  127. }
  128. }
  129. pub fn as_str(&self) -> &'static str {
  130. self.stored
  131. }
  132. /// Coerce a StoredString between namespaces. This exists solely to change the type and
  133. /// performs no actual work under the hood.
  134. pub fn coerce<Tag2>(&self) -> StoredString<Tag2> {
  135. StoredString {
  136. stored: self.stored,
  137. _ghost: Default::default(),
  138. }
  139. }
  140. }
  141. #[cfg(feature = "serde")]
  142. struct StoredStringVisitor<Tag>(std::marker::PhantomData<Tag>);
  143. #[cfg(feature = "serde")]
  144. impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredStringVisitor<Tag> {
  145. type Value = StoredString<Tag>;
  146. fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
  147. write!(formatter, "stored string")
  148. }
  149. fn visit_borrowed_str<E: serde::de::Error>(self, v: &'de str) -> Result<Self::Value, E> {
  150. Ok(StoredString::new(v))
  151. }
  152. fn visit_string<E: serde::de::Error>(self, v: String) -> Result<Self::Value, E> {
  153. Ok(v.into())
  154. }
  155. }
  156. #[cfg(feature = "serde")]
  157. impl<'de, Tag> serde::de::Deserialize<'de> for StoredString<Tag> {
  158. fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
  159. where
  160. D: serde::Deserializer<'de>,
  161. {
  162. deserializer.deserialize_string(StoredStringVisitor::<Tag>(std::marker::PhantomData))
  163. }
  164. }
  165. #[cfg(feature = "serde")]
  166. impl<Tag> serde::ser::Serialize for StoredString<Tag> {
  167. fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
  168. where
  169. S: serde::Serializer,
  170. {
  171. serializer.serialize_str(self.as_str())
  172. }
  173. }
  174. impl<Tag: 'static> From<String> for StoredString<Tag> {
  175. fn from(value: String) -> Self {
  176. Self::new(value.as_str())
  177. }
  178. }
  179. impl<'l, Tag: 'static> From<&'l String> for StoredString<Tag> {
  180. fn from(value: &'l String) -> Self {
  181. Self::new(value.as_str())
  182. }
  183. }
  184. impl<'l, Tag: 'static> From<&'l str> for StoredString<Tag> {
  185. fn from(value: &'l str) -> Self {
  186. Self::new(value)
  187. }
  188. }
  189. #[cfg(test)]
  190. mod test_stored_string {
  191. struct TestTag;
  192. impl super::NamespaceTag for TestTag {
  193. const PREFIX: &'static str = "test";
  194. }
  195. impl super::NamespaceTag for () {
  196. const PREFIX: &'static str = "unit";
  197. }
  198. type SS = super::StoredString<()>;
  199. type SST = super::StoredString<TestTag>;
  200. #[test]
  201. fn build_test() {
  202. assert_eq!(SS::new("ss"), SS::new("ss"));
  203. assert_eq!(SST::new("ss"), SS::new("ss").coerce());
  204. let ss = SST::new("ss");
  205. let ss2 = ss.clone();
  206. let ss3 = ss;
  207. assert_eq!(ss, ss2);
  208. assert_eq!(ss, ss3);
  209. }
  210. #[cfg(feature = "serde")]
  211. #[test]
  212. fn deserialization() {
  213. use serde::Deserialize;
  214. // mocked deserializer that provides exactly a single string
  215. struct MockDeser<'de>(&'de mut usize);
  216. impl<'de> serde::Deserializer<'de> for &'de mut MockDeser<'de> {
  217. type Error = serde::de::value::Error;
  218. fn deserialize_any<V: serde::de::Visitor<'de>>(
  219. self,
  220. _: V,
  221. ) -> Result<V::Value, Self::Error> {
  222. panic!()
  223. }
  224. fn deserialize_str<V: serde::de::Visitor<'de>>(
  225. self,
  226. visitor: V,
  227. ) -> Result<V::Value, Self::Error> {
  228. self.deserialize_string(visitor)
  229. }
  230. fn deserialize_string<V: serde::de::Visitor<'de>>(
  231. self,
  232. visitor: V,
  233. ) -> Result<V::Value, Self::Error> {
  234. *self.0 += 1;
  235. if *self.0 == 1 {
  236. visitor.visit_string(String::from("string"))
  237. } else {
  238. panic!(
  239. "trying to deserialize a second string; state is now {}",
  240. self.0
  241. )
  242. }
  243. }
  244. serde::forward_to_deserialize_any!(
  245. bool i8 i16 i32 i64 i128 u8 u16 u32 u64 u128 f32 f64 char bytes
  246. byte_buf option unit unit_struct newtype_struct seq tuple
  247. tuple_struct map struct enum identifier ignored_any
  248. );
  249. }
  250. let mut state = 0;
  251. let mut deser = MockDeser(&mut state);
  252. assert_eq!(SST::deserialize(&mut deser), Ok(SST::new("string")));
  253. }
  254. }
  255. static OS_STR_STORE: std::sync::LazyLock<
  256. std::sync::Mutex<std::collections::HashSet<&'static OsStr>>,
  257. > = std::sync::LazyLock::new(Default::default);
  258. /// See crate documentation for general description.
  259. pub struct StoredOsString<Tag: 'static> {
  260. stored: &'static OsStr,
  261. _ghost: std::marker::PhantomData<Tag>,
  262. }
  263. impl<Tag: 'static> Clone for StoredOsString<Tag> {
  264. fn clone(&self) -> Self {
  265. *self
  266. }
  267. }
  268. impl<Tag: 'static> Copy for StoredOsString<Tag> {}
  269. impl<Tag: 'static> std::ops::Deref for StoredOsString<Tag> {
  270. type Target = OsStr;
  271. fn deref(&self) -> &Self::Target {
  272. self.stored
  273. }
  274. }
  275. impl<Tag: 'static> AsRef<OsStr> for StoredOsString<Tag> {
  276. fn as_ref(&self) -> &OsStr {
  277. self.stored
  278. }
  279. }
  280. impl<Tag: NamespaceTag> std::fmt::Debug for StoredOsString<Tag> {
  281. fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
  282. f.write_fmt(format_args!("{}:{:?}", Tag::PREFIX, self.stored))
  283. }
  284. }
  285. impl<Tag: 'static> std::hash::Hash for StoredOsString<Tag> {
  286. fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
  287. // pointers are unique per content by construction
  288. (self.stored as *const OsStr).hash(state);
  289. }
  290. }
  291. impl<Tag: 'static> PartialEq for StoredOsString<Tag> {
  292. fn eq(&self, other: &Self) -> bool {
  293. // pointers are unique per content by construction
  294. self.stored.as_encoded_bytes().as_ptr() == other.stored.as_encoded_bytes().as_ptr()
  295. }
  296. }
  297. impl<Tag: 'static> Eq for StoredOsString<Tag> {
  298. fn assert_receiver_is_total_eq(&self) {}
  299. }
  300. impl<Tag: 'static> PartialOrd for StoredOsString<Tag> {
  301. fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
  302. Some(<Self as Ord>::cmp(self, other))
  303. }
  304. }
  305. impl<Tag: 'static> Ord for StoredOsString<Tag> {
  306. fn cmp(&self, other: &Self) -> std::cmp::Ordering {
  307. self.stored
  308. .as_encoded_bytes()
  309. .as_ptr()
  310. .cmp(&other.stored.as_encoded_bytes().as_ptr())
  311. }
  312. }
  313. impl<Tag> StoredOsString<Tag> {
  314. pub fn new(from: &OsStr) -> Self {
  315. let mut mg = OS_STR_STORE.lock().expect("couldn't lock STR_STORE?");
  316. match mg.get(from) {
  317. Some(name) => Self {
  318. stored: name,
  319. _ghost: Default::default(),
  320. },
  321. None => {
  322. let s = Box::leak(from.to_owned().into_boxed_os_str());
  323. mg.insert(s);
  324. Self {
  325. stored: mg.get(from).unwrap(),
  326. _ghost: Default::default(),
  327. }
  328. }
  329. }
  330. }
  331. pub fn as_str(&self) -> &'static OsStr {
  332. self.stored
  333. }
  334. /// Coerce a StoredOsString between namespaces. This exists solely to change the type and
  335. /// performs no actual work under the hood.
  336. pub fn coerce<Tag2>(&self) -> StoredOsString<Tag2> {
  337. StoredOsString {
  338. stored: self.stored,
  339. _ghost: Default::default(),
  340. }
  341. }
  342. }
  343. #[cfg(feature = "serde")]
  344. struct StoredOsStringVisitor<Tag>(std::marker::PhantomData<Tag>);
  345. #[cfg(feature = "serde")]
  346. impl<'de, Tag: 'static> serde::de::Visitor<'de> for StoredOsStringVisitor<Tag> {
  347. type Value = StoredOsString<Tag>;
  348. fn expecting(&self, formatter: &mut std::fmt::Formatter) -> std::fmt::Result {
  349. write!(formatter, "stored OsString")
  350. }
  351. fn visit_bytes<E: serde::de::Error>(self, v: &[u8]) -> Result<Self::Value, E> {
  352. Ok(StoredOsString::from(v))
  353. }
  354. }
  355. #[cfg(feature = "serde")]
  356. impl<'de, Tag> serde::de::Deserialize<'de> for StoredOsString<Tag> {
  357. fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
  358. where
  359. D: serde::Deserializer<'de>,
  360. {
  361. deserializer.deserialize_bytes(StoredOsStringVisitor::<Tag>(std::marker::PhantomData))
  362. }
  363. }
  364. #[cfg(feature = "serde")]
  365. impl<Tag> serde::ser::Serialize for StoredOsString<Tag> {
  366. fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
  367. where
  368. S: serde::Serializer,
  369. {
  370. serializer.serialize_bytes(self.as_encoded_bytes())
  371. }
  372. }
  373. impl<Tag: 'static> From<OsString> for StoredOsString<Tag> {
  374. fn from(value: OsString) -> Self {
  375. Self::new(value.as_os_str())
  376. }
  377. }
  378. impl<'l, Tag: 'static> From<&'l OsString> for StoredOsString<Tag> {
  379. fn from(value: &'l OsString) -> Self {
  380. Self::new(value.as_os_str())
  381. }
  382. }
  383. impl<'l, Tag: 'static> From<&'l OsStr> for StoredOsString<Tag> {
  384. fn from(value: &'l OsStr) -> Self {
  385. Self::new(value)
  386. }
  387. }
  388. #[cfg(target_os = "linux")]
  389. impl<'l, Tag: 'static> From<&'l [u8]> for StoredOsString<Tag> {
  390. fn from(value: &'l [u8]) -> Self {
  391. use std::os::unix::ffi::OsStrExt;
  392. OsStr::from_bytes(value).into()
  393. }
  394. }