#!/usr/bin/env python3
"""
Boletín Oficial → CSV para importar en WordPress
Extrae avisos de un archivo .docx y genera un CSV listo para importar
con el plugin "WP All Import" o "Really Simple CSV Importer".

Uso:
    python boletin_to_csv.py BO29.docx
    python boletin_to_csv.py BO29.docx --salida mis_avisos.csv
"""

import sys
import csv
import argparse
import subprocess
from pathlib import Path


# Mapeo de categorías
CATEGORY_MAP = {
    "Decretos":                           lambda first, _: first.startswith("DECRETO"),
    "Leyes":                              lambda first, _: first.startswith("LEY"),
    "Resoluciones":                       lambda first, _: (
                                              first.startswith("RESOLUCION") or
                                              first.startswith("RESOLUCIÓN") or
                                              "INSTITUTO DE VIVIENDA" in first
                                          ),
    "Municipios - Comisiones Municipales": lambda first, _: (
                                              "MUNICIPALIDAD" in first or
                                              "MUNICIPIO" in first or
                                              "COMISION MUNICIPAL" in first or
                                              "COMISIÓN MUNICIPAL" in first
                                          ),
    "Partidos Políticos":                 lambda first, _: "PARTIDO" in first and "POLITIC" in first,
    "Licitaciones":                       lambda first, _: "LICITACION" in first or "CONCURSO DE PRECIO" in first,
    "Contratos":                          lambda first, _: (
                                              first.startswith("ACTA") or
                                              first.startswith("CONTRATO") or
                                              first.startswith("CESION") or
                                              first.startswith("ESCRITURA") or
                                              "INSTRUMENTO CONSTITUTIVO" in first or
                                              "CONVOCATORIA" in first or
                                              "VISION JUJUY" in first or
                                              "VISIÓN JUJUY" in first or
                                              "DECLARACION JURADA" in first or
                                              "DECLARACIÓN JURADA" in first or
                                              "ANEXO" in first
                                          ),
    "Remates":                            lambda first, _: "REMATE" in first,
    "Concursos y Quiebras":               lambda first, _: "QUIEBRA" in first or (
                                              "CONCURSO" in first and "PRECIO" not in first
                                          ),
    "Edictos de Minas":                   lambda first, body: (
                                              "MINAS" in first or
                                              "JUEZ ADMINISTRATIVO DE MINAS" in body
                                          ),
    "Edictos de Usucapión":               lambda first, _: "USUCAP" in first,
    "Edictos de Notificación":            lambda first, _: "NOTIFICACI" in first,
    "Edictos de Citación":                lambda first, _: "CITACI" in first,
    "Edictos Sucesorios":                 lambda first, body: (
                                              "SUCESORIO" in first or
                                              "TRIBUNAL DE FAMILIA" in body or
                                              "JUZGADO" in first
                                          ),
}


def extract_avisos(docx_path: str) -> list:
    from docx import Document

    doc = Document(docx_path)

    sep_indices = []
    for i, para in enumerate(doc.paragraphs):
        if "pBdr" in para._element.xml and para.text.strip():
            sep_indices.append(i)

    blocks = []
    prev = 0
    for sep_i in sep_indices:
        block = [doc.paragraphs[j].text.strip()
                 for j in range(prev, sep_i + 1)
                 if doc.paragraphs[j].text.strip()]
        if block:
            blocks.append(block)
        prev = sep_i + 1

    last = [doc.paragraphs[j].text.strip()
            for j in range(prev, len(doc.paragraphs))
            if doc.paragraphs[j].text.strip()]
    if last:
        blocks.append(last)

    avisos = []
    for lines in blocks:
        first = lines[0].upper()
        body = " ".join(lines).upper()

        category = "Sin clasificar"
        for cat_name, matcher in CATEGORY_MAP.items():
            if matcher(first, body):
                category = cat_name
                break

        title = lines[0]
        # Contenido en HTML: cada línea como párrafo
        html_content = "\n".join(f"<p>{line}</p>" for line in lines if line.strip())

        avisos.append({
            "title":    title,
            "content":  html_content,
            "category": category,
            "status":   "publish",
            "type":     "post",
        })

    return avisos


def main():
    parser = argparse.ArgumentParser(description="Genera CSV para importar en WordPress")
    parser.add_argument("archivo", help="Archivo .docx del boletín")
    parser.add_argument("--salida", default="", help="Nombre del archivo CSV de salida")
    args = parser.parse_args()

    # Verificar dependencias
    try:
        from docx import Document
    except ImportError:
        print("Instalando python-docx...")
        subprocess.run([sys.executable, "-m", "pip", "install", "python-docx", "-q"])
        from docx import Document

    if not args.archivo.lower().endswith(".docx"):
        print("ERROR: El archivo debe ser .docx")
        print("Abrí el .doc con Word y guardalo como .docx primero.")
        sys.exit(1)

    # Nombre del CSV de salida
    salida = args.salida
    if not salida:
        salida = Path(args.archivo).stem + "_avisos.csv"

    print(f"Extrayendo avisos de {args.archivo}...")
    avisos = extract_avisos(args.archivo)
    print(f"  → {len(avisos)} avisos encontrados")

    # Resumen
    from collections import Counter
    resumen = Counter(a["category"] for a in avisos)
    print("\nResumen por categoría:")
    for cat, n in sorted(resumen.items()):
        print(f"  {n:3d}  {cat}")

    # Escribir CSV
    with open(salida, "w", newline="", encoding="utf-8-sig") as f:
        writer = csv.DictWriter(f, fieldnames=["title", "content", "category", "status", "type"])
        writer.writeheader()
        writer.writerows(avisos)

    print(f"\n✓ CSV generado: {salida}")
    print(f"  {len(avisos)} filas listas para importar en WordPress")
    print()
    print("Próximo paso: importar el CSV en WordPress con el plugin")
    print("'Really Simple CSV Importer' o 'WP All Import'")


if __name__ == "__main__":
    main()