#!/usr/bin/env python3
"""
Boletín Oficial → WordPress
Extrae avisos de un archivo .doc/.docx y los sube a WordPress via API REST.

Uso:
    python3 boletin_to_wordpress.py archivo.doc
    python3 boletin_to_wordpress.py archivo.doc --dry-run   (sin subir, solo muestra)
    python3 boletin_to_wordpress.py archivo.doc --output avisos.json  (exporta JSON)
"""

import sys
import os
import re
import json
import argparse
import subprocess
import tempfile
import base64
import urllib.request
import urllib.error
from pathlib import Path


# ─────────────────────────────────────────────────────────────
# CONFIGURACIÓN — Completar antes de usar
# ─────────────────────────────────────────────────────────────
WP_URL      = "https://boletinoficial.jujuy.gob.ar"  # URL de tu WordPress
WP_USER     = "guillermo"              # Tu usuario de WordPress (el que usas para entrar al panel)
WP_PASSWORD = "Guille$2Opomel0"           # Tu contrasena normal de WordPress
POST_STATUS = "publish"                 # "publish" | "draft" | "pending"
# ─────────────────────────────────────────────────────────────


# Mapeo de categorías: nombre en WordPress → palabras clave para detectar
CATEGORY_MAP = {
    "Decretos":                    lambda first, _: first.startswith("DECRETO"),
    "Leyes":                       lambda first, _: first.startswith("LEY"),
    "Resoluciones":                lambda first, _: (
                                        first.startswith("RESOLUCION") or
                                        first.startswith("RESOLUCIÓN") or
                                        "INSTITUTO DE VIVIENDA" in first
                                    ),
    "Municipios - Comisiones Municipales": lambda first, _: (
                                        "MUNICIPALIDAD" in first or
                                        "MUNICIPIO" in first or
                                        "COMISION MUNICIPAL" in first or
                                        "COMISIÓN MUNICIPAL" in first
                                    ),
    "Partidos Políticos":          lambda first, _: "PARTIDO" in first and "POLITIC" in first,
    "Licitaciones":                lambda first, _: "LICITACION" in first or "CONCURSO DE PRECIO" in first,
    "Contratos":                   lambda first, _: (
                                        first.startswith("ACTA") or
                                        first.startswith("CONTRATO") or
                                        first.startswith("CESION") or
                                        first.startswith("ESCRITURA") or
                                        "INSTRUMENTO CONSTITUTIVO" in first or
                                        "CONVOCATORIA" in first or
                                        "VISION JUJUY" in first or
                                        "VISIÓN JUJUY" in first
                                    ),
    "Remates":                     lambda first, _: "REMATE" in first,
    "Concursos y Quiebras":        lambda first, _: "QUIEBRA" in first or (
                                        "CONCURSO" in first and "PRECIO" not in first
                                    ),
    "Edictos de Minas":            lambda first, body: (
                                        "MINAS" in first or
                                        "JUEZ ADMINISTRATIVO DE MINAS" in body
                                    ),
    "Edictos de Usucapión":        lambda first, _: "USUCAP" in first,
    "Edictos de Notificación":     lambda first, _: "NOTIFICACI" in first,
    "Edictos de Citación":         lambda first, _: "CITACI" in first,
    "Edictos Sucesorios":          lambda first, body: (
                                        "SUCESORIO" in first or
                                        "TRIBUNAL DE FAMILIA" in body or
                                        "JUZGADO" in first
                                    ),
}


def convert_doc_to_docx(doc_path: str) -> str:
    """Convierte .doc a .docx usando LibreOffice si es necesario."""
    if doc_path.lower().endswith(".docx"):
        return doc_path
    with tempfile.TemporaryDirectory() as tmpdir:
        result = subprocess.run(
            ["soffice", "--headless", "--convert-to", "docx",
             "--outdir", tmpdir, doc_path],
            capture_output=True, text=True
        )
        if result.returncode != 0:
            raise RuntimeError(f"Error convirtiendo .doc: {result.stderr}")
        out_name = Path(doc_path).stem + ".docx"
        out_path = os.path.join(tmpdir, out_name)
        # Mover a directorio temporal persistente
        final = os.path.join(tempfile.gettempdir(), Path(doc_path).stem + "_converted.docx")
        import shutil
        shutil.copy(out_path, final)
        return final


def extract_avisos(docx_path: str) -> list[dict]:
    """Extrae los avisos del archivo Word."""
    from docx import Document

    doc = Document(docx_path)

    # Encontrar índices de párrafos separadores (borde inferior)
    sep_indices = []
    for i, para in enumerate(doc.paragraphs):
        if "pBdr" in para._element.xml and para.text.strip():
            sep_indices.append(i)

    # Construir bloques de texto entre separadores
    blocks = []
    prev = 0
    for sep_i in sep_indices:
        block = [doc.paragraphs[j].text.strip()
                 for j in range(prev, sep_i + 1)
                 if doc.paragraphs[j].text.strip()]
        if block:
            blocks.append(block)
        prev = sep_i + 1

    # Último bloque
    last = [doc.paragraphs[j].text.strip()
            for j in range(prev, len(doc.paragraphs))
            if doc.paragraphs[j].text.strip()]
    if last:
        blocks.append(last)

    # Clasificar y armar aviso
    avisos = []
    for lines in blocks:
        first = lines[0].upper()
        body = " ".join(lines).upper()

        category = "Sin clasificar"
        for cat_name, matcher in CATEGORY_MAP.items():
            if matcher(first, body):
                category = cat_name
                break

        title = lines[0]
        content = "\n".join(lines)

        avisos.append({
            "title": title,
            "content": content,
            "category": category,
        })

    return avisos


def get_or_create_category(name: str, auth_header: str, cache: dict) -> int:
    """Obtiene el ID de una categoría WordPress, o la crea si no existe."""
    if name in cache:
        return cache[name]

    api = f"{WP_URL}/wp-json/wp/v2/categories"
    req = urllib.request.Request(
        f"{api}?search={urllib.parse.quote(name)}&per_page=10",
        headers={"Authorization": auth_header}
    )
    with urllib.request.urlopen(req) as resp:
        cats = json.loads(resp.read())

    for cat in cats:
        if cat["name"].lower() == name.lower():
            cache[name] = cat["id"]
            return cat["id"]

    # Crear la categoría
    data = json.dumps({"name": name}).encode()
    req = urllib.request.Request(api, data=data, headers={
        "Authorization": auth_header,
        "Content-Type": "application/json"
    })
    with urllib.request.urlopen(req) as resp:
        new_cat = json.loads(resp.read())
    cache[name] = new_cat["id"]
    print(f"  ✚ Categoría creada: {name} (id={new_cat['id']})")
    return new_cat["id"]


def post_to_wordpress(aviso: dict, auth_header: str, cat_cache: dict) -> dict:
    """Publica un aviso en WordPress."""
    import urllib.parse

    cat_id = get_or_create_category(aviso["category"], auth_header, cat_cache)

    # Convertir saltos de línea a párrafos HTML
    paragraphs = aviso["content"].split("\n")
    html_content = "\n".join(f"<p>{p}</p>" for p in paragraphs if p.strip())

    data = json.dumps({
        "title":      aviso["title"],
        "content":    html_content,
        "status":     POST_STATUS,
        "categories": [cat_id],
    }).encode("utf-8")

    req = urllib.request.Request(
        f"{WP_URL}/wp-json/wp/v2/posts",
        data=data,
        headers={
            "Authorization": auth_header,
            "Content-Type": "application/json; charset=utf-8",
        }
    )
    with urllib.request.urlopen(req) as resp:
        return json.loads(resp.read())


def main():
    import urllib.parse

    parser = argparse.ArgumentParser(description="Sube avisos del Boletín Oficial a WordPress")
    parser.add_argument("archivo", help="Archivo .doc o .docx del boletín")
    parser.add_argument("--dry-run", action="store_true",
                        help="Solo muestra los avisos, sin subir nada")
    parser.add_argument("--output", metavar="ARCHIVO.json",
                        help="Exporta los avisos a un archivo JSON (sin subir)")
    parser.add_argument("--categoria", metavar="NOMBRE",
                        help="Subir solo avisos de esta categoría")
    args = parser.parse_args()

    # ── Verificar dependencias
    try:
        from docx import Document
    except ImportError:
        print("Instalando python-docx...")
        subprocess.run([sys.executable, "-m", "pip", "install",
                        "python-docx", "--break-system-packages", "-q"])
        from docx import Document

    # ── Convertir si es .doc
    archivo = args.archivo
    if archivo.lower().endswith(".doc"):
        print(f"Convirtiendo {archivo} a .docx...")
        archivo = convert_doc_to_docx(archivo)

    # ── Extraer avisos
    print(f"Extrayendo avisos de {archivo}...")
    avisos = extract_avisos(archivo)
    print(f"  → {len(avisos)} avisos encontrados\n")

    # ── Filtrar por categoría si se pidió
    if args.categoria:
        avisos = [a for a in avisos if a["category"].lower() == args.categoria.lower()]
        print(f"  → {len(avisos)} avisos en la categoría '{args.categoria}'\n")

    # ── Mostrar resumen de categorías
    from collections import Counter
    resumen = Counter(a["category"] for a in avisos)
    print("Resumen por categoría:")
    for cat, n in sorted(resumen.items()):
        print(f"  {n:3d}  {cat}")
    print()

    # ── Exportar JSON si se pidió
    if args.output:
        with open(args.output, "w", encoding="utf-8") as f:
            json.dump(avisos, f, ensure_ascii=False, indent=2)
        print(f"Avisos exportados a: {args.output}")
        return

    # ── Dry run
    if args.dry_run:
        print("=== DRY RUN — No se subirá nada ===\n")
        for i, av in enumerate(avisos):
            print(f"[{i+1:3d}/{len(avisos)}] [{av['category']}]")
            print(f"       Título: {av['title'][:80]}")
            print()
        return

    # ── Subir a WordPress
    if WP_URL in ("https://TU-SITIO.com", "https://boletinoficial.jujuy.gob.ar") and WP_USER == "tu_usuario":
        print("ERROR: Configurar WP_URL, WP_USER y WP_PASSWORD en el script antes de subir.")
        sys.exit(1)

    creds = base64.b64encode(f"{WP_USER}:{WP_PASSWORD}".encode()).decode()
    auth_header = f"Basic {creds}"
    cat_cache = {}

    print(f"Subiendo {len(avisos)} avisos a {WP_URL}...\n")
    errores = []
    for i, av in enumerate(avisos):
        try:
            result = post_to_wordpress(av, auth_header, cat_cache)
            print(f"[{i+1:3d}/{len(avisos)}] ✓ {av['category']} → {av['title'][:60]}")
            print(f"          {result.get('link', '')}")
        except urllib.error.HTTPError as e:
            msg = e.read().decode()
            print(f"[{i+1:3d}/{len(avisos)}] ✗ ERROR {e.code}: {av['title'][:60]}")
            print(f"          {msg[:120]}")
            errores.append((av, str(e)))
        except Exception as e:
            print(f"[{i+1:3d}/{len(avisos)}] ✗ ERROR: {av['title'][:60]} → {e}")
            errores.append((av, str(e)))

    print(f"\n✓ Completado: {len(avisos)-len(errores)}/{len(avisos)} publicados")
    if errores:
        print(f"✗ {len(errores)} errores — revisar manualmente")


if __name__ == "__main__":
    main()