Browse Source

Add CSV import skeleton.

Kestrel 2 weeks ago
parent
commit
f4711eab4f
8 changed files with 247 additions and 16 deletions
  1. 2 0
      Cargo.toml
  2. 22 5
      src/data.rs
  3. 10 6
      src/data/ledger/print.rs
  4. 30 0
      src/data/spec.rs
  5. 153 0
      src/import.rs
  6. 16 1
      src/main.rs
  7. 12 4
      src/show.rs
  8. 2 0
      testdata/root.toml

+ 2 - 0
Cargo.toml

@@ -16,6 +16,8 @@ serde = { version = "1.0", features = ["derive"] }
 toml = { version = "0.8", features = [] }
 chumsky = { version = "0.10", features = ["lexical-numbers"] }
 ariadne = { version = "0.5" }
+csv = { version = "1.3" }
+strptime = { version = "1.1" }
 
 # cli dependencies
 pretty_env_logger = { version = "0.5.0" }

+ 22 - 5
src/data.rs

@@ -1,7 +1,7 @@
 use std::collections::HashMap;
 
 use ariadne::Cache;
-use chumsky::span::Span as CSpan;
+use chumsky::span::Span as _;
 
 pub use rust_decimal::Decimal;
 
@@ -98,7 +98,13 @@ impl std::error::Error for DataError {}
 #[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Span {
     range: (usize, usize),
-    context: SourceFile,
+    context: Option<SourceFile>,
+}
+
+impl Default for Span {
+    fn default() -> Self {
+        Self { range: (0,0), context: None }
+    }
 }
 
 impl chumsky::span::Span for Span {
@@ -107,7 +113,7 @@ impl chumsky::span::Span for Span {
 
     fn new(context: Self::Context, range: std::ops::Range<Self::Offset>) -> Self {
         Self {
-            context,
+            context: Some(context),
             range: (range.start, range.end),
         }
     }
@@ -121,7 +127,7 @@ impl chumsky::span::Span for Span {
     }
 
     fn context(&self) -> Self::Context {
-        self.context
+        self.context.unwrap()
     }
 
     fn to_end(&self) -> Self {
@@ -135,7 +141,7 @@ impl chumsky::span::Span for Span {
 impl ariadne::Span for Span {
     type SourceId = SourceFile;
     fn source(&self) -> &Self::SourceId {
-        &self.context
+        self.context.as_ref().unwrap()
     }
     fn start(&self) -> usize {
         self.range.0
@@ -185,6 +191,12 @@ impl<T: PartialEq> PartialEq for Spanned<T> {
 
 impl<T: Eq> Eq for Spanned<T> {}
 
+impl<T> From<T> for Spanned<T> {
+    fn from(value: T) -> Self {
+        Self(value, Span::default())
+    }
+}
+
 impl<T: PartialOrd> PartialOrd for Spanned<T> {
     fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
         self.0.partial_cmp(&other.0)
@@ -248,6 +260,7 @@ impl Root {
                         description: None,
                         annotations: None,
                         unit: None,
+                        import: None,
                     });
                 } else {
                     return Err(DataError::Validation(String::from(
@@ -366,6 +379,10 @@ impl Root {
         self.spec_root.units.get(&unit)
     }
 
+    pub fn spec_root(&self) -> &spec::SpecRoot {
+        &self.spec_root
+    }
+
     pub fn balance(&self, aname: AccountName) -> Option<HashMap<UnitName, Decimal>> {
         let mut running = HashMap::<UnitName, Decimal>::new();
 

+ 10 - 6
src/data/ledger/print.rs

@@ -54,7 +54,7 @@ fn print_comment(c: &Spanned<String>) {
 }
 
 pub fn print_ledger<'l>(root: &Root, entries: impl Iterator<Item = &'l LedgerEntry>) {
-    let mut ordering = BTreeMap::<SourceFile, BTreeMap<Span, &LedgerEntry>>::new();
+    let mut ordering = BTreeMap::<Option<SourceFile>, BTreeMap<Span, &LedgerEntry>>::new();
 
     entries.for_each(|e| {
         ordering
@@ -68,11 +68,15 @@ pub fn print_ledger<'l>(root: &Root, entries: impl Iterator<Item = &'l LedgerEnt
         return;
     };
 
-    for (filename, entries) in ordering {
-        println!(
-            "==== file {} ====",
-            std::path::Path::new(filename.as_ref()).display()
-        );
+    for (source, entries) in ordering {
+        if let Some(filename) = source {
+            println!(
+                "==== file {} ====",
+                std::path::Path::new(filename.as_ref()).display()
+            );
+        } else {
+            println!("==== new data ====");
+        }
         for (_span, le) in entries {
             match le {
                 LedgerEntry::Transaction(tx) => print_transaction(root, tx, padding),

+ 30 - 0
src/data/spec.rs

@@ -2,6 +2,34 @@ use std::collections::HashMap;
 
 use super::{AccountName, UnitName};
 
+#[derive(Debug, serde::Deserialize, PartialEq)]
+#[serde(deny_unknown_fields,rename_all="snake_case")]
+pub enum CSVColumnSpec {
+    Ignore,
+    Datestamp,
+    Title,
+    Deposit,
+    Withdraw,
+    Change,
+    Balance,
+    Unit,
+}
+
+#[derive(Debug, serde::Deserialize)]
+#[serde(deny_unknown_fields)]
+pub struct CSVImportSpec {
+    pub skip_start: Option<usize>,
+    pub skip_end: Option<usize>,
+    pub cols: Vec<CSVColumnSpec>,
+    pub date_format: String,
+}
+
+#[derive(Debug, serde::Deserialize)]
+#[serde(deny_unknown_fields, tag = "format")]
+pub enum ImportFileFormat {
+    CSV(CSVImportSpec)
+}
+
 #[derive(Debug, serde::Deserialize)]
 #[serde(deny_unknown_fields)]
 pub struct AccountSpec {
@@ -11,6 +39,8 @@ pub struct AccountSpec {
     pub annotations: Option<HashMap<String, String>>,
 
     pub unit: Option<UnitName>,
+
+    pub import: Option<ImportFileFormat>,
 }
 
 #[derive(Debug, serde::Deserialize)]

+ 153 - 0
src/import.rs

@@ -0,0 +1,153 @@
+use crate::data::{AccountName, Datestamp, Decimal, spec::{AccountSpec, ImportFileFormat, CSVImportSpec, CSVColumnSpec}, ledger::{Change, Transaction}, Span, Spanned, UnitName};
+
+#[derive(Debug)]
+pub enum ImportError {
+    IOError(std::io::Error),
+    ConfigError(String),
+    InputError(String),
+    CSVError(csv::Error),
+}
+
+impl From<std::io::Error> for ImportError {
+    fn from(value: std::io::Error) -> Self {
+        Self::IOError(value)
+    }
+}
+
+impl From<csv::Error> for ImportError {
+    fn from(value: csv::Error) -> Self {
+        Self::CSVError(value)
+    }
+}
+
+impl From<strptime::ParseError> for ImportError {
+    fn from(value: strptime::ParseError) -> Self {
+        Self::InputError(value.to_string())
+    }
+}
+
+impl From<rust_decimal::Error> for ImportError {
+    fn from(value: rust_decimal::Error) -> Self {
+        Self::InputError(value.to_string())
+    }
+}
+
+fn try_parse_decimal(from: &str) -> Result<Decimal, ImportError> {
+    // remove any '$' or units from the string
+    let filtered = from.chars().filter(|f| char::is_digit(*f, 10) || *f == '.' || *f == '-').collect::<String>();
+
+    Decimal::from_str_radix(filtered.as_str(), 10).map_err(Into::into)
+}
+
+fn import_from_csv(csv_spec: &CSVImportSpec, aspec: &AccountSpec, target: AccountName, mut reader: impl std::io::Read) -> Result<Vec<Transaction>, ImportError> {
+    let mut csv_reader = csv::Reader::from_reader(reader);
+
+    // validate CSV spec
+    if !csv_spec.cols.contains(&CSVColumnSpec::Datestamp) {
+        return Err(ImportError::ConfigError("CSV config does not have a datestamp column".into()))
+    }
+
+    if !csv_spec.cols.contains(&CSVColumnSpec::Change) {
+        if !csv_spec.cols.contains(&CSVColumnSpec::Withdraw) || !csv_spec.cols.contains(&CSVColumnSpec::Deposit) {
+            return Err(ImportError::ConfigError("CSV config needs either a change column or both withdraw and deposit columns!".into()))
+        }
+    }
+
+    // strptime is silly and wants a &'static format string
+    let date_format = Box::leak(csv_spec.date_format.clone().into_boxed_str());
+    let date_parser = strptime::Parser::new(date_format);
+
+    let unbalanced = AccountName::new("unbalanced");
+
+    let mut txns = vec![];
+
+    for record in csv_reader.records() {
+        let record = record?;
+
+        let mut txn_datestamp : Option<Datestamp> = None;
+        let mut txn_title : Option<String> = None;
+        let mut txn_change : Option<Decimal> = None;
+        let mut txn_balance : Option<Decimal> = None;
+        let mut txn_unit : Option<UnitName> = None;
+
+        for (record, spec) in record.iter().zip(csv_spec.cols.iter()) {
+            match spec {
+                CSVColumnSpec::Ignore => (),
+                CSVColumnSpec::Datestamp => {
+                    let date = date_parser.parse(record)?.date()?;
+                    txn_datestamp = Some(Datestamp { year: date.year() as u16, month: date.month(), day: date.day() });
+                },
+                CSVColumnSpec::Title => {
+                    txn_title = Some(record.into());
+                },
+                CSVColumnSpec::Deposit => {
+                    if record.trim().is_empty() {
+                        continue
+                    }
+
+                    txn_change = Some(try_parse_decimal(record)?);
+                },
+                CSVColumnSpec::Withdraw => {
+                    if record.trim().is_empty() {
+                        continue
+                    }
+                    let mut dec = try_parse_decimal(record)?;
+                    dec.set_sign_negative(true);
+                    txn_change = Some(dec);
+                },
+                CSVColumnSpec::Change => {
+                    if record.trim().is_empty() {
+                        continue
+                    }
+
+                    txn_change = Some(try_parse_decimal(record)?);
+                },
+                CSVColumnSpec::Balance => {
+                    if record.trim().is_empty() {
+                        continue
+                    }
+
+                    txn_balance = Some(try_parse_decimal(record)?);
+                },
+                CSVColumnSpec::Unit => {
+                    txn_unit = Some(UnitName::new(record));
+                },
+            }
+        }
+
+        txns.push(Transaction {
+            datestamp: txn_datestamp.unwrap(),
+            title: txn_title,
+            annotations: vec![],
+            changes: vec![
+                Change {
+                    account: target.into(),
+                    amount: txn_change.unwrap().into(),
+                    balance: txn_balance.map(Into::into),
+                    unit: txn_unit.or(aspec.unit).map(Into::into).unwrap(),
+                }.into(),
+
+                Change {
+                    account: unbalanced.into(),
+                    amount: txn_change.unwrap().into(),
+                    balance: txn_balance.map(Into::into),
+                    unit: txn_unit.or(aspec.unit).map(Into::into).unwrap(),
+                }.into(),
+            ]
+        });
+
+        // println!("{txn_datestamp:?}: {txn_title:?}");
+        // println!("- account: {txn_change:?} = {txn_balance:?} {txn_unit:?}");
+    }
+
+    Ok(txns)
+}
+
+pub fn import_from(aspec: &AccountSpec, target: AccountName, path: &std::path::Path) -> Result<Vec<Transaction>, ImportError> {
+    let reader = std::fs::File::open(path)?;
+
+    match &aspec.import {
+        Some(ImportFileFormat::CSV(csv)) => import_from_csv(csv, aspec, target, reader),
+        None => Err(ImportError::ConfigError(format!("no import configuration for {target}"))),
+    }
+}

+ 16 - 1
src/main.rs

@@ -3,6 +3,7 @@ use itertools::Itertools;
 mod check;
 mod data;
 mod show;
+mod import;
 
 #[derive(clap::Parser)]
 struct Invocation {
@@ -23,6 +24,7 @@ enum Command {
     Summarize,
     Ledger { account: String },
     Reformat,
+    Import { account: String, from: std::path::PathBuf },
 }
 
 fn load_data(
@@ -92,7 +94,7 @@ impl Command {
 
                 let tt = show::TransactionTable::default();
                 if let Some(ld) = data.ledger_data_for(aname) {
-                    tt.show(aname, ld.iter().map(data::Spanned::as_ref));
+                    tt.show(&data, aname, ld.iter().map(data::Spanned::as_ref));
                 } else {
                     log::error!("account not found!");
                 }
@@ -102,6 +104,19 @@ impl Command {
 
                 data::ledger::print_ledger(&data, data.all_ledger_data().iter());
             }
+            Self::Import { account, from } => {
+                let data = load_data(&mut fsdata, inv)?;
+
+                let aname = account.into();
+                let Some(aspec) = data.account_spec(aname) else {
+                    todo!()
+                };
+
+                let imported = import::import_from(aspec, aname, from.as_path()).unwrap();
+
+                let tt = show::TransactionTable::default();
+                tt.show(&data, aname, imported.iter());
+            }
         }
         Ok(())
     }

+ 12 - 4
src/show.rs

@@ -3,8 +3,10 @@ use std::fmt::Display;
 use console::Style;
 
 use crate::data::{
+    Root,
     AccountName, Decimal,
     ledger::{Change, Transaction},
+    spec::AccountSpec,
 };
 
 #[derive(Clone, Copy, Default)]
@@ -79,7 +81,8 @@ fn show_table<'d>(cols: Vec<Column>, rows: impl Iterator<Item = Row>) {
 pub struct TransactionTable {}
 
 impl TransactionTable {
-    pub fn show<'d>(self, account: AccountName, txns: impl Iterator<Item = &'d Transaction>) {
+    pub fn show<'d>(self, root: &Root, account: AccountName, txns: impl Iterator<Item = &'d Transaction>) {
+
         show_table(
             vec![
                 // datestamp
@@ -105,18 +108,23 @@ impl TransactionTable {
                     ..Default::default()
                 },
             ],
+            vec![
+                Row::Data(vec!["Date".into(), "Memo".into(), "Amount".into(), "Balance".into()]),
+                Row::Line,
+            ].into_iter().chain(
             txns.filter_map(|txn| txn.change_for(account).map(|chg| (txn, chg)))
                 .map(|(txn, chg)| {
+                    let precision = root.unit_spec(*chg.unit).unwrap().precision.unwrap_or(2) as usize;
                     Row::Data(vec![
                         txn.datestamp.to_string(),
                         txn.title.clone().unwrap_or_else(String::new),
-                        format!("{}", chg.amount),
+                        format!("{:.precision$}", chg.amount),
                         chg.balance
                             .as_deref()
-                            .map(Decimal::to_string)
+                            .map(|b| format!("{:.precision$}", b))
                             .unwrap_or(String::new()),
                     ])
-                }),
+                })),
         )
     }
 }

+ 2 - 0
testdata/root.toml

@@ -13,6 +13,8 @@ unit = "CAD"
 [accounts.savings]
 unit = "CAD"
 
+import = { format = "CSV", skip_start = 1, cols = ["datestamp", "title", "change", "balance"], date_format = "%d %b %Y" }
+
 [accounts.savings_usd]
 unit = "USD"