Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

importer: Remove requirement for filename and lineno metadata #101

Open
wants to merge 1 commit into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 19 additions & 2 deletions beangulp/importer.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,23 @@
compare = similar.SimilarityComparator()


def sortkey(entry):
"""Comparison key for ledger entries. Sort by date and entry type.

Note that this differs from beancount.core.data.entry_sortkey() by
not including the "filename" and "lineno" metadata entries in the
key. This removes the need for importers to include these metadata
entries for the generated entries.

Args:
entry: A ledger entry.
Returns:
Sorting key.

"""
return (entry.date, data.SORT_ORDER.get(type(entry), 0))


class Importer(abc.ABC):
"""Interface that all source importers need to comply with.

Expand Down Expand Up @@ -173,7 +190,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None:
to sort in descending order. Importers can implement this
method to have entries serialized to file in a specific
order. The default implementation sorts the entries according
to beancount.core.data.entry_sortkey().
to beangulp.importer.sortkey().

Args:
entries: Entries list to sort.
Expand All @@ -183,7 +200,7 @@ def sort(self, entries: data.Entries, reverse=False) -> None:
None.

"""
return entries.sort(key=data.entry_sortkey, reverse=reverse)
return entries.sort(key=sortkey, reverse=reverse)


class ImporterProtocol:
Expand Down
2 changes: 1 addition & 1 deletion beangulp/importers/csv.py
Original file line number Diff line number Diff line change
Expand Up @@ -344,7 +344,7 @@ def get(row, ftype):
currency = get(row, Col.CURRENCY) or self.currency

# Create a transaction
meta = data.new_metadata(file.name, index)
meta = {}
if txn_date is not None:
meta['date'] = parse_date_liberally(txn_date,
self.dateutil_kwds)
Expand Down
16 changes: 5 additions & 11 deletions beangulp/importers/csvbase.py
Original file line number Diff line number Diff line change
Expand Up @@ -277,10 +277,7 @@ def extract(self, filepath, existing):
balances = defaultdict(list)
default_account = self.account(filepath)

# Compute the line number of the first data line.
offset = int(self.skiplines) + bool(self.names) + 1

for lineno, row in enumerate(self.read(filepath), offset):
for row in self.read(filepath):
# Skip empty lines.
if not row:
continue
Expand All @@ -301,7 +298,7 @@ def extract(self, filepath, existing):
units = data.Amount(row.amount, currency)

# Create a transaction.
txn = data.Transaction(self.metadata(filepath, lineno, row),
txn = data.Transaction(self.metadata(row),
row.date, flag, payee, row.narration, tags, links, [
data.Posting(account, units, None, None, None, None),
])
Expand All @@ -317,8 +314,7 @@ def extract(self, filepath, existing):
if balance is not None:
date = row.date + datetime.timedelta(days=1)
units = data.Amount(balance, currency)
meta = data.new_metadata(filepath, lineno)
balances[currency].append(data.Balance(meta, date, account, units, None, None))
balances[currency].append(data.Balance({}, date, account, units, None, None))

if not entries:
return []
Expand All @@ -333,22 +329,20 @@ def extract(self, filepath, existing):

return entries

def metadata(self, filepath, lineno, row):
def metadata(self, row):
"""Build transaction metadata dictionary.

This method can be extended to add customized metadata
entries based on the content of the data row.

Args:
filepath: Path to the file being imported.
lineno: Line number of the data being processed.
row: The data row being processed.

Returns:
A metadata dictionary.

"""
return data.new_metadata(filepath, lineno)
return {}

def finalize(self, txn, row):
"""Post process the transaction.
Expand Down
4 changes: 2 additions & 2 deletions beangulp/importers/csvbase_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -433,8 +433,8 @@ class CSVImporter(Base):
data = Amount(4)
names = False

def metadata(self, filepath, lineno, row):
meta = super().metadata(filepath, lineno, row)
def metadata(self, row):
meta = super().metadata(row)
for field in 'meta', 'data':
meta[field] = getattr(row, field)
return meta
Expand Down
17 changes: 6 additions & 11 deletions examples/importers/ofx.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,6 @@

import datetime
import enum
import itertools
import re

from os import path
Expand Down Expand Up @@ -96,7 +95,7 @@ def extract(self, filepath, existing):
"""Extract a list of partially complete transactions from the file."""
with open(filepath) as fd:
soup = bs4.BeautifulSoup(fd, 'lxml')
return extract(soup, filepath, self.acctid_regexp, self.importer_account,
return extract(soup, self.acctid_regexp, self.importer_account,
flags.FLAG_OKAY, self.balance_type)


Expand All @@ -110,10 +109,9 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
flag: A single-character string.
balance_type: An enum of type BalanceType.
Returns:
A sorted list of entries.
A list of entries.
"""
new_entries = []
counter = itertools.count()
for acctid, currency, transactions, balance in find_statement_transactions(soup):
if not re.match(acctid_regexp, acctid):
continue
Expand All @@ -122,9 +120,8 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
stmt_entries = []
for stmttrn in transactions:
entry = build_transaction(stmttrn, flag, account, currency)
entry = entry._replace(meta=data.new_metadata(filename, next(counter)))
stmt_entries.append(entry)
stmt_entries = data.sorted(stmt_entries)
stmt_entries.sort(key=lambda entry: entry.date)
new_entries.extend(stmt_entries)

# Create a Balance directive.
Expand All @@ -137,13 +134,12 @@ def extract(soup, filename, acctid_regexp, account, flag, balance_type):
# it to the following day.
date += datetime.timedelta(days=1)

meta = data.new_metadata(filename, next(counter))
balance_entry = data.Balance(meta, date, account,
balance_entry = data.Balance(None, date, account,
amount.Amount(number, currency),
None, None)
new_entries.append(balance_entry)

return data.sorted(new_entries)
return new_entries


def parse_ofx_time(date_str):
Expand Down Expand Up @@ -297,6 +293,5 @@ def build_transaction(stmttrn, flag, account, currency):
posting = data.Posting(account, units, None, None, None, None)

# Build the transaction with a single leg.
fileloc = data.new_metadata('<build_transaction>', 0)
return data.Transaction(fileloc, date, flag, payee, narration,
return data.Transaction({}, date, flag, payee, narration,
data.EMPTY_SET, data.EMPTY_SET, [posting])