#!/usr/bin/env python3
import csv
import json
import sqlite3
from pathlib import Path

import reverse_geocoder as rg

ROOT = Path('/home/louicyp/clawd-clean')
DB = ROOT / 'data' / 'aircash_locations.sqlite'
OUT = ROOT / 'data' / 'countries'
OUT.mkdir(parents=True, exist_ok=True)

conn = sqlite3.connect(DB)
conn.row_factory = sqlite3.Row
rows = conn.execute(
    '''
    SELECT id, lat, lng, partner_name, address_line, post_code_and_city, working_hours,
           payment_location, payout_location, atm_location, aircash_card_location, abon_location,
           active, last_update_utc
    FROM locations
    WHERE lat IS NOT NULL AND lng IS NOT NULL
    ORDER BY id
    '''
).fetchall()

records = [dict(r) for r in rows]
coords = [(r['lat'], r['lng']) for r in records]

print(f'Loaded {len(records)} rows. Reverse geocoding...')
geo = rg.search(coords, mode=2)  # fast kd-tree lookup

by_country = {}
for r, g in zip(records, geo):
    cc = g.get('cc', 'XX')
    r['country_code'] = cc
    r['country_name'] = g.get('name', '')
    by_country.setdefault(cc, []).append(r)

summary = []
for cc, items in sorted(by_country.items(), key=lambda kv: len(kv[1]), reverse=True):
    jpath = OUT / f'aircash_locations_{cc}.json'
    cpath = OUT / f'aircash_locations_{cc}.csv'

    jpath.write_text(json.dumps(items, ensure_ascii=False, indent=2), encoding='utf-8')
    with cpath.open('w', newline='', encoding='utf-8') as f:
        writer = csv.DictWriter(f, fieldnames=list(items[0].keys()))
        writer.writeheader()
        writer.writerows(items)

    summary.append({'country_code': cc, 'count': len(items)})

summary_path = OUT / 'country_summary.json'
summary_path.write_text(json.dumps(summary, indent=2), encoding='utf-8')

print(f'Created {len(summary)} country datasets in {OUT}')
print('Top 10:')
for s in summary[:10]:
    print(f"  {s['country_code']}: {s['count']}")
