#!/usr/bin/env python3 import os import argparse import asyncio from claude_agent_sdk import query, ClaudeAgentOptions, ResultMessage from docling.document_converter import DocumentConverter GET_BEANCOUNT_STATEMENTS_PROMPT = """# System Prompt: Personal Finances to Beancount Parser You are a specialized financial transaction parser that converts bank account movements into Beancount format. ## Input Format You will receive a table with the following columns: - **Fecha**: Transaction date - **Fecha Valor**: Value date - **Movimiento**: Transaction description - **Más datos**: Additional details (may be empty) - **Importe**: Amount (negative for expenses, positive for income) - **Saldo**: Account balance after transaction Example input: ``` | Fecha | Fecha Valor | Movimiento | Más datos | Importe | Saldo | 2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 ``` ## Output Format Convert each transaction into a Beancount entry with this structure: ``` YYYY-MM-DD * "Payee" "Description" ExpenseAccount AMOUNT EUR Assets:Liquid:Caixabank:Corrent ``` ### Rules for Conversion 1. **Date**: Use the "Fecha" field in YYYY-MM-DD format 2. **Flag**: Always use `*` (cleared transaction) 3. **Payee**: Extract the main payee name from the "Movimiento" field (first recognizable entity/merchant name or infer it from it) 4. **Description**: Use the full "Movimiento" text as the description 5. **Amount**: Use the absolute value of "Importe" (remove the negative sign for expenses) 6. **Currency**: Always use EUR 7. **Source Account**: Always use `Assets:Liquid:Caixabank:Corrent` as the second posting (the account is automatically debited) ### Expense Account Classification You will be provided with a list of available expense accounts. Analyze each transaction and classify it into the most appropriate account based on: - The payee/merchant name - The transaction description - Common spending patterns **Available Income Accounts:** Income:Work:Zurich:Salari Income:Work:Zurich:TicketsRestaurant Income:Work:Zurich:TargetaTransport Income:Work:Zurich:SeguroMedic Income:Work:Zurich:Gimnas Income:Work:Zurich:DZP Income:Other:Caixabank:Transferencia Income:Other:Caixabank:Bizum Income:Savings:Caixabank:RentabilitatEstalvis Income:Savings:TradeRepublic:RentabilitatEstalvis Income:Invest:R4:Dividends Income:Invest:R4:CapitalGains Income:Invest:R4:CapitalGains:Untaxable Income:Invest:DZP:CapitalGains Income:Other:Devolucions **Available Expense Accounts:** Expenses:R4:Comissions Expenses:R4:Interessos Expenses:Caixabank:Comissions Expenses:Taxes:IRPF Expenses:Taxes:BeneficisDividends Expenses:Taxes:BeneficisDividendsOrigen Expenses:Taxes:ImpostCirculacio Expenses:Insurance:Cotxe Expenses:Lloguer Expenses:FacturesUtilitats Expenses:Internet Expenses:Gasolina Expenses:MantenimentCotxe Expenses:Roba Expenses:Educació Expenses:Medic Expenses:Vacances Expenses:Perruqueria Expenses:AmazonPrime Expenses:CarnetJove Expenses:Supermercat Expenses:Gimnàs Expenses:Parking Expenses:Mobilitat Expenses:MarcaPersonal Expenses:MenjarFora Expenses:Entreteniment Expenses:Llar Expenses:Higiene Expenses:Donatiu Expenses:Altres ### Transaction Type Detection - **Expenses** (negative Importe): Post to an Expenses:* account - **Income** (positive Importe): Post to an Income:* account ### Special Cases - If a transaction is ambiguous, choose the most likely expense category - For unknown merchants, use a generic account like `Expenses:Altres` - Preserve reference numbers and transaction IDs in the description - If "Más datos" contains relevant information, consider including it in the description ## Example **Input:** ``` 2025-10-09 00:00:00 | 2025-10-09 00:00:00 | Nintendo CD148015 | | -69.99 | 10000.00 ``` **Output:** ``` 2025-10-09 * "Nintendo" "Nintendo CD148015" Expenses:Entreteniment 69.99 EUR Assets:Liquid:Caixabank:Corrent ``` ## Output Requirements - Process all transactions in the input table - Maintain chronological order - Ensure proper indentation (2 spaces for posting lines) - Do not include the balance information in the Beancount output - Be consistent with account naming conventions - Only output Beancount code, explanations are not needed. ## Your Task Parse the provided account movements data tables and generate the corresponding Beancount price statements. Output only the Beancount code. """ async def get_beancount_price_statements(r4_report: str) -> str: options = ClaudeAgentOptions( system_prompt=GET_BEANCOUNT_STATEMENTS_PROMPT, cwd=os.getcwd() ) result = None async for message in query( prompt="Convert this financial account movements table to " f"beancount price statements:\n{r4_report}", options=options ): if isinstance(message, ResultMessage) and message.subtype == "success": result = message.result else: print(message) if result is not None and isinstance(result, str): return result else: raise ValueError( "Unable to get Beancount price statements from the report!") def parse_response(beancount_statements: str): """ The input beancount statements might be inside a markdown beancount code block or in plain text. """ import re # Extract content from markdown code block if present code_block_pattern = r'```(?:beancount)?\n(.*?)```' match = re.search(code_block_pattern, beancount_statements, re.DOTALL) if match: content = match.group(1) else: content = beancount_statements return content def extract_last_balances_by_month(markdown_report: str) -> dict: """ Extract the last transaction date and balance for each month from the markdown table. Returns a dict with keys as (year, month) tuples and values as (date, balance) tuples. """ import re from collections import defaultdict lines = markdown_report.split('\n') transactions_by_month = {} for line in lines: if '|' not in line: continue parts = line.split('|') if len(parts) < 7: continue fecha_col = parts[1].strip() saldo_col = parts[6].strip() date_match = re.search(r'(\d{4})-(\d{2})-(\d{2})', fecha_col) if date_match and saldo_col: try: balance = saldo_col.replace(',', '').replace(' ', '') float(balance) year = date_match.group(1) month = date_match.group(2) date = f"{year}-{month}-{date_match.group(3)}" month_key = (year, month) if month_key not in transactions_by_month or date > transactions_by_month[month_key][0]: transactions_by_month[month_key] = (date, balance) except ValueError: continue return transactions_by_month def save_statements( beancount_statements: str, last_balances_by_month: dict ): """ The statements are saved in beancount files in ledger/transactions/YYYY/MM.beancount. Statements are sorted chronologically and split by month if they span multiple months. A balance assertion is added at the end of each month's file. """ import re from pathlib import Path from collections import defaultdict if not beancount_statements.strip(): print("Warning: No valid statements to save") return lines = beancount_statements.strip().split('\n') transactions = [] current_transaction = [] for line in lines: if re.match(r'^\d{4}-\d{2}-\d{2}', line): if current_transaction: transactions.append('\n'.join(current_transaction)) current_transaction = [line] elif current_transaction: current_transaction.append(line) if current_transaction: transactions.append('\n'.join(current_transaction)) transactions.sort(key=lambda t: re.match( r'^(\d{4}-\d{2}-\d{2})', t).group(1)) transactions_by_month = defaultdict(list) for transaction in transactions: date_match = re.match(r'^(\d{4})-(\d{2})-\d{2}', transaction) if date_match: year = date_match.group(1) month = date_match.group(2) key = (year, month) transactions_by_month[key].append(transaction) for (year, month), month_transactions in sorted(transactions_by_month.items()): output_dir = Path(f"ledger/transactions/{year}") output_dir.mkdir(parents=True, exist_ok=True) output_file = output_dir / f"{month}.beancount" existing_content = "" if output_file.exists(): with open(output_file, 'r') as f: existing_content = f.read() with open(output_file, 'w') as f: if existing_content: f.write(existing_content) if not existing_content.endswith('\n'): f.write('\n') f.write('\n'.join(month_transactions)) f.write('\n') month_key = (year, month) if month_key in last_balances_by_month: last_date, last_balance = last_balances_by_month[month_key] if last_date and last_balance: f.write(f'\n{last_date} balance Assets:Liquid:Caixabank:Corrent { last_balance} EUR\n') print(f"Saved statements to {output_file}") def convert_file_to_markdown(path: str): converter = DocumentConverter() result = converter.convert(path) return result.document.export_to_markdown() async def main(): parser = argparse.ArgumentParser( description="Parse R4 report from XLSX format") parser.add_argument("source", help="Path to the input XLSX file") args = parser.parse_args() if not args.source.endswith(".xlsx"): parser.error("Input file must have .xlsx format") markdown_report = convert_file_to_markdown(args.source) beancount_statements = await get_beancount_price_statements( markdown_report ) print(f"Final result: \n{beancount_statements}") clean_beancount_statements = parse_response(beancount_statements) last_balances_by_month = extract_last_balances_by_month(markdown_report) save_statements(clean_beancount_statements, last_balances_by_month) if __name__ == "__main__": asyncio.run(main())