Преглед на файлове

Improve ledger parsing.

Kestrel преди 2 дни
родител
ревизия
cd77d87d21
променени са 8 файла, в които са добавени 154 реда и са изтрити 49 реда
  1. 4 5
      Cargo.toml
  2. 21 2
      src/data.rs
  3. 87 32
      src/data/ledger.rs
  4. 10 8
      src/data/spec.rs
  5. 14 0
      src/data/unit.rs
  6. 10 1
      src/main.rs
  7. 4 1
      testdata/ledger
  8. 4 0
      testdata/root.toml

+ 4 - 5
Cargo.toml

@@ -7,17 +7,16 @@ edition = "2024"
 # core dependencies
 log = { version = "0.4.0" }
 anyhow = { version = "1" }
-stringstore = { version = "0.1.2" }
-
-# data processing dependencies
-commodity = { version = "0.4.0" }
+stringstore = { version = "0.1.5", features = ["serde"], path = "../stringstore/" }
+rust_decimal = { version = "1.37" }
 
 # i/o dependencies
 serde = { version = "1.0", features = ["derive"] }
 toml = { version = "0.8", features = [] }
-chumsky = { version = "0.10", features = [] }
+chumsky = { version = "0.10", features = ["lexical-numbers"] }
 ariadne = { version = "0.5" }
 
 # cli dependencies
 pretty_env_logger = { version = "0.5.0" }
 clap = { version = "4.5", features = ["derive", "env"] }
+console = { version = "0.15" }

+ 21 - 2
src/data.rs

@@ -7,6 +7,7 @@ use ariadne::Cache;
 
 mod ledger;
 mod spec;
+mod unit;
 
 pub struct LocationTag;
 impl stringstore::NamespaceTag for LocationTag {
@@ -32,6 +33,7 @@ pub struct DataSource {
     range: std::ops::Range<usize>,
 }
 
+/// Helper for accessing data on the filesystem
 #[derive(Default)]
 pub struct FilesystemData {
     file_data: HashMap<SourceFile, ariadne::Source<std::rc::Rc<str>>>,
@@ -71,6 +73,7 @@ impl ariadne::Cache<SourceFile> for FilesystemData {
 pub enum DataError {
     IOError(std::io::Error),
     Report(ariadne::Report<'static, (SourceFile, std::ops::Range<usize>)>),
+    Validation(String),
 }
 
 impl std::fmt::Display for DataError {
@@ -100,7 +103,23 @@ impl Root {
         let root_data = fsdata.fetch(&sf).unwrap();
 
         match toml::from_str::<spec::RootSpec>(root_data.text()) {
-            Ok(root_spec) => {
+            Ok(mut root_spec) => {
+                let initial_name = AccountName::from("initial");
+                if let Some(_) = root_spec.accounts.get(&initial_name) {
+                    return Err(DataError::Validation(String::from(
+                        "cannot define 'initial' account, as it is a built-in",
+                    )));
+                } else {
+                    root_spec.accounts.insert(
+                        initial_name,
+                        spec::AccountSpec {
+                            title: None,
+                            description: None,
+                            annotations: None,
+                            default_unit: None,
+                        },
+                    );
+                }
                 let mut r = Self {
                     path: path.into(),
                     root_spec,
@@ -152,7 +171,7 @@ impl Root {
             let s = SourceFile::new(path.as_os_str());
             let data = fsdata.fetch(&s).unwrap();
             self.ledger_data
-                .extend(ledger::parse_ledger(s, data.text())?);
+                .extend(ledger::parse_ledger(s, &self.root_spec, data.text())?);
         }
 
         Ok(())

+ 87 - 32
src/data/ledger.rs

@@ -1,6 +1,6 @@
-use super::{AccountName, UnitName};
+use super::{AccountName, UnitName, spec::RootSpec};
 
-use chumsky::prelude::*;
+use chumsky::{prelude::*, text::inline_whitespace};
 
 #[derive(Clone, Copy, Hash, PartialEq, PartialOrd, Debug, Ord, Eq)]
 pub enum Direction {
@@ -11,8 +11,7 @@ pub enum Direction {
 #[derive(Clone, Debug, PartialEq, PartialOrd, Ord, Eq)]
 pub struct Balance {
     pub account: AccountName,
-    pub amount: usize,
-    pub dir: Direction,
+    pub amount: rust_decimal::Decimal,
     pub unit: UnitName,
 }
 
@@ -30,52 +29,105 @@ impl LedgerEntry {
             .find(|b| b.account == account)
             .is_some()
     }
+
+    pub fn balance_for(&self, account: AccountName) -> Option<&Balance> {
+        self.balances.iter().find(|b| b.account == account)
+    }
+
+    pub fn split_balances(
+        &self,
+        account: AccountName,
+    ) -> Option<(&Balance, impl Iterator<Item = &Balance>)> {
+        let index = self.balances.iter().position(|b| b.account == account)?;
+        Some((
+            &self.balances[index],
+            self.balances[0..index]
+                .iter()
+                .chain(self.balances[index + 1..].iter()),
+        ))
+    }
 }
 
-fn ledger_parser<'a>()
--> impl Parser<'a, &'a str, Vec<LedgerEntry>, chumsky::extra::Err<chumsky::error::Rich<'a, char>>> {
+struct Spanned<T>(T, SimpleSpan);
+
+fn ledger_parser<'a>() -> impl Parser<
+    'a,
+    &'a str,
+    Vec<LedgerEntry>,
+    chumsky::extra::Full<
+        chumsky::error::Rich<'a, char>,
+        chumsky::extra::SimpleState<&'a RootSpec>,
+        (),
+    >,
+> {
     let int = chumsky::text::digits(10)
-        .collect()
-        .map(|v: String| v.parse::<usize>().unwrap());
+        .to_slice()
+        .map(|v: &str| v.parse::<usize>().unwrap());
 
     let datestamp = group((int, just('-').ignored(), int, just('-').ignored(), int))
         .map(|(y, _, m, _, d)| (y as u16, m as u8, d as u8));
 
-    let mark = |m| {
-        chumsky::text::inline_whitespace()
-            .ignore_then(just(m))
-            .then_ignore(chumsky::text::inline_whitespace())
-    };
+    let mark = |m| just(m).padded_by(inline_whitespace());
+
+    let decimal_digit = one_of("0123456789.,");
+    let decimal_digits = decimal_digit
+        .or(just(' ').repeated().ignore_then(decimal_digit))
+        .repeated();
+
+    let decimal = choice((just('-').ignored(), just('+').ignored(), empty()))
+        .then(decimal_digits)
+        .to_slice()
+        .try_map(|s: &str, span| {
+            rust_decimal::Decimal::from_str_exact(s.trim()).map_err(|e| {
+                Rich::custom(span, format!("Failed to parse '{s}' as a decimal number"))
+            })
+        });
 
     let balance = group((
         mark('-'),
-        chumsky::text::ident().map(|v| stringstore::StoredString::new(v)),
+        none_of(": \n\t").repeated().to_slice().map_with(|v, e| Spanned(stringstore::StoredString::new(v), e.span())),
         mark(':'),
+        decimal,
         choice((
-            mark('-').map(|_| Direction::Withdrawal),
-            mark('+').map(|_| Direction::Deposit),
-        )),
-        int,
-        just('.'),
-        int,
-        chumsky::text::inline_whitespace(),
-        chumsky::primitive::none_of("\n")
-            .repeated()
-            .collect::<String>(),
-        chumsky::text::newline(),
+            inline_whitespace()
+                .at_least(1)
+                .ignore_then(chumsky::text::ident())
+                .then_ignore(inline_whitespace())
+                .map_with(|u, e| Some(Spanned(UnitName::new(u), e.span()))),
+            inline_whitespace().map(|_| None),
+        ))
+        .then_ignore(chumsky::text::newline()),
     ))
-    .map(|(_, acc, _, dir, w, _, f, _, unit, _)| Balance {
-        account: acc,
-        dir,
-        amount: w,
-        unit: "UNIT".into(),
+    .try_map_with(|(_, Spanned(acc_name, acc_span), _, amount, unit_info, ), e| {
+        let span = e.span();
+        let spec: &mut chumsky::extra::SimpleState<&RootSpec> = e.state();
+
+        let Some(acc_spec) = spec.accounts.get(&acc_name) else {
+            return Err(chumsky::error::Rich::custom(acc_span, "no such account"));
+        };
+
+        let (unit, unit_span) = match unit_info {
+            Some(Spanned(unit, unit_span)) => (unit, unit_span),
+            None => acc_spec.default_unit.map(|u| (u, span)).ok_or_else(||
+                chumsky::error::Rich::custom(span, format!("No unit specified and no default unit specified for account '{acc_name}'")))?
+        };
+
+        if !spec.units.contains_key(&unit) {
+            return Err(chumsky::error::Rich::custom(unit_span, format!("no such unit '{unit}' found")))
+        }
+
+        Ok(Balance {
+            account: acc_name,
+            amount,
+            unit,
+        })
     });
 
     let entry = group((
         chumsky::text::whitespace(),
         datestamp,
         mark(':'),
-        chumsky::text::inline_whitespace(),
+        inline_whitespace(),
         chumsky::primitive::none_of("\n")
             .repeated()
             .collect::<String>(),
@@ -94,11 +146,14 @@ fn ledger_parser<'a>()
 
 pub fn parse_ledger(
     source: super::SourceFile,
+    spec: &super::spec::RootSpec,
     data: &str,
 ) -> Result<Vec<LedgerEntry>, super::DataError> {
     let parser = ledger_parser();
 
-    let (presult, errors) = parser.parse(data).into_output_errors();
+    let (presult, errors) = parser
+        .parse_with_state(data, &mut chumsky::extra::SimpleState(spec))
+        .into_output_errors();
 
     if let Some(e) = errors.first() {
         let span = e.span().start()..e.span().end();

+ 10 - 8
src/data/spec.rs

@@ -1,21 +1,23 @@
 use std::collections::HashMap;
 
+use super::{AccountName, UnitName};
+
 #[derive(Debug, serde::Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct AccountSpec {
-    title: Option<String>,
-    description: Option<String>,
+    pub title: Option<String>,
+    pub description: Option<String>,
 
-    annotations: Option<HashMap<String, String>>,
+    pub annotations: Option<HashMap<String, String>>,
 
-    default_unit: Option<String>,
+    pub default_unit: Option<UnitName>,
 }
 
 #[derive(Debug, serde::Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct UnitSpec {
-    name: String,
-    format: String,
+    pub name: UnitName,
+    pub format: String,
 }
 
 #[derive(Debug, serde::Deserialize)]
@@ -23,7 +25,7 @@ pub struct UnitSpec {
 pub struct RootSpec {
     pub ledger_path: std::path::PathBuf,
 
-    units: HashMap<String, UnitSpec>,
+    pub units: HashMap<UnitName, UnitSpec>,
 
-    accounts: HashMap<String, AccountSpec>,
+    pub accounts: HashMap<AccountName, AccountSpec>,
 }

+ 14 - 0
src/data/unit.rs

@@ -0,0 +1,14 @@
+use super::UnitName;
+
+pub struct UnitSpec {}
+
+pub struct UnitValue {
+    amount: rust_decimal::Decimal,
+    unit: UnitName,
+}
+
+impl UnitValue {
+    pub fn new(unit: UnitName, amount: rust_decimal::Decimal) -> Self {
+        Self { unit, amount }
+    }
+}

+ 10 - 1
src/main.rs

@@ -31,6 +31,7 @@ fn load_data(
             report.eprint(fsdata)?;
             Err(anyhow::anyhow!("Parse error"))
         }
+        Err(data::DataError::Validation(verr)) => Err(anyhow::anyhow!("Validation error: {verr}")),
     }
 }
 
@@ -52,7 +53,15 @@ impl Command {
                 let aname = data::AccountName::new(account.as_str());
                 if let Some(ld) = data.ledger_data_for(aname) {
                     for le in ld {
-                        log::info!("- le: {le:?}");
+                        let Some((acc_bal, other_bal)) = le.split_balances(aname) else {
+                            continue;
+                        };
+                        log::info!(
+                            "{:?}: {} {:?}",
+                            le.datestamp,
+                            acc_bal.amount,
+                            other_bal.map(|b| b.account).collect::<Vec<_>>()
+                        );
                     }
                 } else {
                     log::info!("account not found. data: {data:?}");

+ 4 - 1
testdata/ledger

@@ -1,8 +1,11 @@
 2001-01-05: initial balance
-  - initial: -400.00
+  - initial: -400.00 CAD
   - chequing: +400.00
 
 2001-01-07: transfer to savings
   - chequing: -300.00
   - savings: +300.00
 
+2001-02-07: test for unusual account name
+ - initial: -4.00 USD
+ - a.b/c: +4.00 USD

+ 4 - 0
testdata/root.toml

@@ -2,6 +2,7 @@ ledger_path = "./ledger"
 
 [units]
 CAD = { name = "Canadian Dollar", format = "CA$" }
+USD = { name = "United States Dollar", format = "US$" }
 
 [accounts.chequing]
 title = "Chequing"
@@ -14,3 +15,6 @@ default_unit = "CAD"
 
 [accounts.loan]
 default_unit = "CAD"
+
+[accounts."a.b/c"]
+