瀏覽代碼

Improve resiliency of CSV import.

Kestrel 2 周之前
父節點
當前提交
92047e3325
共有 4 個文件被更改,包括 82 次插入73 次删除
  1. 2 2
      src/cmd.rs
  2. 19 6
      src/data.rs
  3. 59 63
      src/import.rs
  4. 2 2
      src/show.rs

+ 2 - 2
src/cmd.rs

@@ -103,7 +103,7 @@ impl Command {
 
                 let tt = show::TransactionTable::default();
                 if let Some(ld) = data.ledger_data_for(aname) {
-                    tt.show(&data, aname, ld.iter().map(data::Spanned::as_ref));
+                    tt.show(Some(&data), aname, ld.iter().map(data::Spanned::as_ref));
                 } else {
                     log::error!("account not found!");
                 }
@@ -148,7 +148,7 @@ impl Command {
                 } else {
                     log::info!("No target specified, showing new data on stdout.");
                     let tt = show::TransactionTable::default();
-                    tt.show(&data, aname, imported.iter());
+                    tt.show(Some(&data), aname, imported.iter());
                 }
             }
             Self::Dedup => {

+ 19 - 6
src/data.rs

@@ -276,10 +276,23 @@ impl Root {
                 path.pop();
             }
         } else {
-            let s = SourceFile::new(std::fs::canonicalize(path)?.as_os_str());
-            let data = fsdata.fetch(&s).unwrap();
-            self.ledger_data
-                .extend(ledger::parse_ledger(s, &self.spec_root, data.text())?);
+            let path = std::fs::canonicalize(path)?;
+            let Some(filename) = path.file_name() else {
+                return Ok(());
+            };
+            // skip filenames beginning with a dot
+            if filename.as_encoded_bytes()[0] == b'.' {
+                log::info!("Skipping file {}", path.display());
+                return Ok(());
+            }
+
+            let sf = SourceFile::new_from_string(path.into_os_string());
+            if let Ok(data) = fsdata.fetch(&sf) {
+                self.ledger_data
+                    .extend(ledger::parse_ledger(sf, &self.spec_root, data.text())?);
+            } else {
+                log::error!("Failed to load data from {}", std::path::Path::new(sf.as_str()).display());
+            }
         }
 
         Ok(())
@@ -309,9 +322,9 @@ impl Root {
             }
         }
 
-        for txns in self.account_ledger_data.values_mut() {
+        /*for txns in self.account_ledger_data.values_mut() {
             txns.sort_by_key(|txn| txn.datestamp);
-        }
+        }*/
     }
 
     pub fn all_ledger_data(&self) -> &[ledger::LedgerEntry] {

+ 59 - 63
src/import.rs

@@ -1,4 +1,4 @@
-use std::collections::HashMap;
+use std::collections::{HashMap,BTreeSet};
 
 use crate::data::{
     AccountName, Datestamp, Decimal, UnitName,
@@ -167,78 +167,74 @@ fn import_from_csv(
     Ok(txns)
 }
 
-fn postprocess(account: AccountName, transactions: &mut Vec<Transaction>) {
-    // check if we need to re-order transactions due to balances not lining up because of ordering
-    let mut running_balances = HashMap::<UnitName, Decimal>::new();
-    let mut idx = 0;
-
-    // first get things vaguely sorted
-    transactions.sort_by_key(|tx| tx.datestamp);
-
-    let check_for_match = |running_balances: &mut HashMap<UnitName, Decimal>, change: &Change| {
-        let bal = *change.balance.unwrap();
-        match running_balances.entry(*change.unit) {
-            std::collections::hash_map::Entry::Vacant(entry) => {
-                entry.insert(bal);
-                return true;
+fn recursive_order_search(txns: &[Transaction], account: AccountName, order: &mut Vec<usize>, remaining: &mut BTreeSet<usize>) -> bool {
+    if remaining.is_empty() {
+        return true
+    }
+
+    let possibles = remaining.iter().cloned().collect::<Vec<_>>();
+
+    if let Some(last) = order.last().as_deref() {
+        let Some(last_balance) = txns[*last].change_for(account).unwrap().balance.as_deref() else {
+            return false
+        };
+        for possible in possibles.iter() {
+            // check if balances line up
+            let change = txns[*possible].change_for(account).unwrap();
+            let Some(nbal) = change.balance else {
+                continue
+            };
+
+            if last_balance.checked_add(*change.amount) != Some(*nbal) {
+                continue
             }
-            std::collections::hash_map::Entry::Occupied(mut entry) => {
-                let rbal = entry.get_mut();
-                let new_rbal = rbal.checked_add(*change.amount).unwrap();
-                if new_rbal != bal {
-                    return false;
-                } else {
-                    *rbal = new_rbal;
-                    return true;
-                }
+
+            remaining.remove(possible);
+            order.push(*possible);
+            if recursive_order_search(txns, account, order, remaining) {
+                return true
             }
+            order.pop();
+            remaining.insert(*possible);
         }
-    };
-
-    let mut removed: Vec<Transaction> = vec![];
-
-    'outer: loop {
-        for ridx in 0..removed.len() {
-            if check_for_match(
-                &mut running_balances,
-                removed[ridx].change_for(account).unwrap(),
-            ) {
-                transactions.insert(idx, removed.remove(ridx));
-                log::trace!("pulling transaction out of removed");
-                idx += 1;
-                continue 'outer;
+    } else {
+        for possible in possibles.into_iter() {
+            remaining.remove(&possible);
+            order.push(possible);
+            if recursive_order_search(txns, account, order, remaining) {
+                return true
             }
+            order.pop();
+            remaining.insert(possible);
         }
+    }
 
-        if idx >= transactions.len() {
-            break;
-        }
-
-        let tx = &transactions[idx];
-        let change = tx.change_for(account).unwrap();
-        if change.balance.is_none() {
-            idx += 1;
-            continue;
-        };
+    false
+}
 
-        if check_for_match(&mut running_balances, change) {
-            log::trace!(
-                "transaction is good! balance is now: {}",
-                running_balances[&*change.unit]
-            );
-            idx += 1;
-        } else {
-            log::trace!("shifting transaction to removed");
-            removed.push(transactions.remove(idx));
-        }
+fn postprocess(account: AccountName, transactions: &mut Vec<Transaction>) {
+    // check if we're sorted by datestamp already
+    if transactions.is_sorted_by_key(|tx| tx.datestamp) {
+        // already vaguely in the right order
+    } else if transactions.iter().rev().is_sorted_by_key(|tx| tx.datestamp) {
+        // reverse everything
+        transactions.reverse();
+    } else {
+        // otherwise try to get things vaguely sorted
+        transactions.sort_by_key(|tx| tx.datestamp);
     }
 
-    if removed.len() > 0 {
-        log::error!(
-            "Not all transactions are consistent! Inconsistent transactions below will be discarded:"
-        );
-        // crate::show::TransactionTable::default().show(
+    let mut to_assign = BTreeSet::from_iter(0..transactions.len());
+    let mut order = vec![];
+
+    if !recursive_order_search(transactions, account, &mut order, &mut to_assign) {
+        log::warn!("Unable to determine transaction ordering!");
+        return
     }
+
+    let mut ntransact = order.iter().map(|v| transactions[*v].clone()).collect::<Vec<_>>();
+
+    std::mem::swap(&mut ntransact, transactions);
 }
 
 pub fn import_from(

+ 2 - 2
src/show.rs

@@ -79,7 +79,7 @@ pub struct TransactionTable {}
 impl TransactionTable {
     pub fn show<'d>(
         self,
-        root: &Root,
+        root: Option<&Root>,
         account: AccountName,
         txns: impl Iterator<Item = &'d Transaction>,
     ) {
@@ -122,7 +122,7 @@ impl TransactionTable {
                 txns.filter_map(|txn| txn.change_for(account).map(|chg| (txn, chg)))
                     .map(|(txn, chg)| {
                         let precision =
-                            root.unit_spec(*chg.unit).unwrap().precision.unwrap_or(2) as usize;
+                            root.and_then(|r| r.unit_spec(*chg.unit)).and_then(|v| v.precision).unwrap_or(2) as usize;
                         Row::Data(vec![
                             txn.datestamp.to_string(),
                             txn.title.clone().unwrap_or_else(String::new),