From 06a3d4ebb0a6ecff8d12762e9ec3ad2b005ce3f2 Mon Sep 17 00:00:00 2001 From: Daniele Nicolodi Date: Sun, 6 Feb 2022 15:18:51 +0100 Subject: [PATCH] importer: Remove requirement for filename and lineno metadata Importers have to add "filename" and "lineno" metadata entries to the generated entries. However, these are used only by the the bencount.core.data.entry_sortkey() sorting key function when entries are written in the ledger. As the Python sort implementation is guaranteed stable, this is at best not useful and actually problematic when the source document is sorted in reverse date order and the importer does not explicitly takes this in account. Implementing a sortkey() function that does not use these metadata entries removes the need for the importers to generate them and avoids the issue with reverse sorting of entries for the same day. Propagate the change to the base classes and example importers. --- beangulp/importer.py | 21 +++++++++++++++++++-- beangulp/importers/csv.py | 2 +- beangulp/importers/csvbase.py | 16 +++++----------- beangulp/importers/csvbase_test.py | 4 ++-- examples/importers/ofx.py | 17 ++++++----------- 5 files changed, 33 insertions(+), 27 deletions(-) diff --git a/beangulp/importer.py b/beangulp/importer.py index 3506125..ff7c895 100644 --- a/beangulp/importer.py +++ b/beangulp/importer.py @@ -21,6 +21,23 @@ compare = similar.SimilarityComparator() +def sortkey(entry): + """Comparison key for ledger entries. Sort by date and entry type. + + Note that this differs from beancount.core.data.entry_sortkey() by + not including the "filename" and "lineno" metadata entries in the + key. This removes the need for importers to include these metadata + entries for the generated entries. + + Args: + entry: A ledger entry. + Returns: + Sorting key. + + """ + return (entry.date, data.SORT_ORDER.get(type(entry), 0)) + + class Importer(abc.ABC): """Interface that all source importers need to comply with. @@ -173,7 +190,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None: to sort in descending order. Importers can implement this method to have entries serialized to file in a specific order. The default implementation sorts the entries according - to beancount.core.data.entry_sortkey(). + to beangulp.importer.sortkey(). Args: entries: Entries list to sort. @@ -183,7 +200,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None: None. """ - return entries.sort(key=data.entry_sortkey, reverse=reverse) + return entries.sort(key=sortkey, reverse=reverse) class ImporterProtocol: diff --git a/beangulp/importers/csv.py b/beangulp/importers/csv.py index a1b4842..4713c22 100644 --- a/beangulp/importers/csv.py +++ b/beangulp/importers/csv.py @@ -344,7 +344,7 @@ def get(row, ftype): currency = get(row, Col.CURRENCY) or self.currency # Create a transaction - meta = data.new_metadata(file.name, index) + meta = {} if txn_date is not None: meta['date'] = parse_date_liberally(txn_date, self.dateutil_kwds) diff --git a/beangulp/importers/csvbase.py b/beangulp/importers/csvbase.py index 38213af..bf6130b 100644 --- a/beangulp/importers/csvbase.py +++ b/beangulp/importers/csvbase.py @@ -277,10 +277,7 @@ def extract(self, filepath, existing): balances = defaultdict(list) default_account = self.account(filepath) - # Compute the line number of the first data line. - offset = int(self.skiplines) + bool(self.names) + 1 - - for lineno, row in enumerate(self.read(filepath), offset): + for row in self.read(filepath): # Skip empty lines. if not row: continue @@ -301,7 +298,7 @@ def extract(self, filepath, existing): units = data.Amount(row.amount, currency) # Create a transaction. - txn = data.Transaction(self.metadata(filepath, lineno, row), + txn = data.Transaction(self.metadata(row), row.date, flag, payee, row.narration, tags, links, [ data.Posting(account, units, None, None, None, None), ]) @@ -317,8 +314,7 @@ def extract(self, filepath, existing): if balance is not None: date = row.date + datetime.timedelta(days=1) units = data.Amount(balance, currency) - meta = data.new_metadata(filepath, lineno) - balances[currency].append(data.Balance(meta, date, account, units, None, None)) + balances[currency].append(data.Balance({}, date, account, units, None, None)) if not entries: return [] @@ -333,22 +329,20 @@ def extract(self, filepath, existing): return entries - def metadata(self, filepath, lineno, row): + def metadata(self, row): """Build transaction metadata dictionary. This method can be extended to add customized metadata entries based on the content of the data row. Args: - filepath: Path to the file being imported. - lineno: Line number of the data being processed. row: The data row being processed. Returns: A metadata dictionary. """ - return data.new_metadata(filepath, lineno) + return {} def finalize(self, txn, row): """Post process the transaction. diff --git a/beangulp/importers/csvbase_test.py b/beangulp/importers/csvbase_test.py index c7e72e4..13794b8 100644 --- a/beangulp/importers/csvbase_test.py +++ b/beangulp/importers/csvbase_test.py @@ -433,8 +433,8 @@ class CSVImporter(Base): data = Amount(4) names = False - def metadata(self, filepath, lineno, row): - meta = super().metadata(filepath, lineno, row) + def metadata(self, row): + meta = super().metadata(row) for field in 'meta', 'data': meta[field] = getattr(row, field) return meta diff --git a/examples/importers/ofx.py b/examples/importers/ofx.py index 9e5977b..84bd779 100644 --- a/examples/importers/ofx.py +++ b/examples/importers/ofx.py @@ -20,7 +20,6 @@ import datetime import enum -import itertools import re from os import path @@ -96,7 +95,7 @@ def extract(self, filepath, existing): """Extract a list of partially complete transactions from the file.""" with open(filepath) as fd: soup = bs4.BeautifulSoup(fd, 'lxml') - return extract(soup, filepath, self.acctid_regexp, self.importer_account, + return extract(soup, self.acctid_regexp, self.importer_account, flags.FLAG_OKAY, self.balance_type) @@ -110,10 +109,9 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type): flag: A single-character string. balance_type: An enum of type BalanceType. Returns: - A sorted list of entries. + A list of entries. """ new_entries = [] - counter = itertools.count() for acctid, currency, transactions, balance in find_statement_transactions(soup): if not re.match(acctid_regexp, acctid): continue @@ -122,9 +120,8 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type): stmt_entries = [] for stmttrn in transactions: entry = build_transaction(stmttrn, flag, account, currency) - entry = entry._replace(meta=data.new_metadata(filename, next(counter))) stmt_entries.append(entry) - stmt_entries = data.sorted(stmt_entries) + stmt_entries.sort(key=lambda entry: entry.date) new_entries.extend(stmt_entries) # Create a Balance directive. @@ -137,13 +134,12 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type): # it to the following day. date += datetime.timedelta(days=1) - meta = data.new_metadata(filename, next(counter)) - balance_entry = data.Balance(meta, date, account, + balance_entry = data.Balance(None, date, account, amount.Amount(number, currency), None, None) new_entries.append(balance_entry) - return data.sorted(new_entries) + return new_entries def parse_ofx_time(date_str): @@ -297,6 +293,5 @@ def build_transaction(stmttrn, flag, account, currency): posting = data.Posting(account, units, None, None, None, None) # Build the transaction with a single leg. - fileloc = data.new_metadata('', 0) - return data.Transaction(fileloc, date, flag, payee, narration, + return data.Transaction({}, date, flag, payee, narration, data.EMPTY_SET, data.EMPTY_SET, [posting])