feature/implementacion de hub en EFC

2026-06-08 07:19:01 -06:00
parent a9931d2838
commit e1716d65a7
20 changed files with 3749 additions and 649 deletions
--- a/api/datastage/management/init.py
+++ b/api/datastage/management/init.py
--- a/api/datastage/management/commands/init.py
+++ b/api/datastage/management/commands/init.py
--- a/api/datastage/management/commands/reprocesar_datastages.py
+++ b/api/datastage/management/commands/reprocesar_datastages.py
@@ -0,0 +1,195 @@
+"""
+Reprocesa datastages ya cargados: elimina los Registro* existentes del datastage
+y reprocesa los archivos .asc de forma SINCRÓNICA (sin Celery).
+
+Casos de uso:
+  - Los registros quedaron vacíos por un bug y ya fue corregido.
+  - Se quiere refrescar los datos sin que el usuario vuelva a subir el archivo.
+
+Los Pedimentos existentes NO se tocan (el create en la task falla silenciosamente
+por unique_together si ya existen).
+
+Uso:
+  python manage.py reprocesar_datastages                          # todos los datastages
+  python manage.py reprocesar_datastages --organizacion <UUID>   # solo una org
+  python manage.py reprocesar_datastages --datastage 4 7 12      # IDs específicos
+  python manage.py reprocesar_datastages --organizacion <UUID> --datastage 4
+  python manage.py reprocesar_datastages --dry-run               # sin cambios
+"""
+
+import os
+import tempfile
+import zipfile
+
+from django.core.management.base import BaseCommand, CommandError
+
+from api.datastage.models import (
+    DataStage,
+    Registro500, Registro501, Registro502, Registro503, Registro504,
+    Registro505, Registro506, Registro507, Registro508, Registro509,
+    Registro510, Registro511, Registro512, Registro520,
+    Registro551, Registro552, Registro553, Registro554, Registro555,
+    Registro556, Registro557, Registro558,
+    RegistroSel,
+    Registro701, Registro702,
+)
+
+REGISTRO_MODELS = [
+    Registro500, Registro501, Registro502, Registro503, Registro504,
+    Registro505, Registro506, Registro507, Registro508, Registro509,
+    Registro510, Registro511, Registro512, Registro520,
+    Registro551, Registro552, Registro553, Registro554, Registro555,
+    Registro556, Registro557, Registro558,
+    RegistroSel,
+    Registro701, Registro702,
+]
+
+
+class Command(BaseCommand):
+    help = "Elimina los Registro* de datastages procesados y vuelve a procesarlos de forma sincrónica."
+
+    def add_arguments(self, parser):
+        parser.add_argument(
+            "--organizacion", metavar="UUID",
+            help="UUID de la organización. Sin este arg: todas las orgs.",
+        )
+        parser.add_argument(
+            "--datastage", metavar="ID", nargs="+", type=int,
+            help="Uno o más IDs de DataStage a reprocesar.",
+        )
+        parser.add_argument(
+            "--dry-run", action="store_true",
+            help="Solo muestra lo que haría, sin borrar ni insertar.",
+        )
+
+    def handle(self, *args, **options):
+        org_id  = options.get("organizacion")
+        ds_ids  = options.get("datastage")
+        dry_run = options["dry_run"]
+
+        if dry_run:
+            self.stdout.write(self.style.WARNING(
+                "=== MODO PRUEBA (--dry-run): sin cambios en BD ===\n"
+            ))
+
+        qs = DataStage.objects.select_related("organizacion").order_by("id")
+        if org_id:
+            qs = qs.filter(organizacion_id=org_id)
+        if ds_ids:
+            qs = qs.filter(id__in=ds_ids)
+
+        total = qs.count()
+        if total == 0:
+            self.stdout.write(self.style.WARNING("No se encontraron datastages con los filtros indicados."))
+            return
+
+        self.stdout.write(f"Datastages a reprocesar: {total}\n")
+
+        ok = err = 0
+        for ds in qs:
+            exito = self._reprocesar(ds, dry_run)
+            if exito:
+                ok += 1
+            else:
+                err += 1
+
+        self._print_summary(ok, err, dry_run)
+
+    # ------------------------------------------------------------------ #
+
+    def _reprocesar(self, ds, dry_run):
+        org_nombre = ds.organizacion.nombre if ds.organizacion else "sin organización"
+        self.stdout.write(
+            f"\nDataStage ID={ds.id} | org={org_nombre} | archivo={ds.archivo or '—'}"
+        )
+
+        if not ds.archivo:
+            self.stdout.write(self.style.ERROR("  → Sin archivo asociado, se omite."))
+            return False
+
+        # 1. Eliminar Registro* existentes
+        total_borrados = 0
+        for Model in REGISTRO_MODELS:
+            qs_modelo = Model.objects.filter(datastage=ds)
+            count = qs_modelo.count()
+            if count == 0:
+                continue
+            if not dry_run:
+                qs_modelo.delete()
+            estado = "[dry-run]" if dry_run else "borrados"
+            self.stdout.write(f"  {Model.__name__}: {count} {estado}")
+            total_borrados += count
+
+        if total_borrados == 0:
+            self.stdout.write("  → Sin registros existentes en ninguna tabla.")
+        else:
+            self.stdout.write(f"  Total eliminados: {total_borrados}")
+
+        if dry_run:
+            self.stdout.write(self.style.WARNING(
+                "  → [dry-run] Se procesarían los archivos .asc del datastage."
+            ))
+            return True
+
+        # 2. Descargar ZIP una vez para obtener la lista de .asc
+        from api.utils.storage_service import storage_service
+
+        ruta = str(ds.archivo)
+        if not storage_service.file_exists(ruta):
+            self.stdout.write(self.style.ERROR(
+                f"  El archivo no existe en storage: {ruta}"
+            ))
+            return False
+
+        tmp_path = None
+        try:
+            with tempfile.NamedTemporaryFile(delete=False, suffix=".zip") as tmp:
+                tmp_path = tmp.name
+
+            if not storage_service.download_file(ruta, tmp_path):
+                self.stdout.write(self.style.ERROR(
+                    f"  No se pudo descargar '{ruta}' — verifica conectividad con MinIO."
+                ))
+                return False
+
+            with zipfile.ZipFile(tmp_path, "r") as zf:
+                asc_files = [n for n in zf.namelist() if n.endswith(".asc")]
+
+        finally:
+            if tmp_path and os.path.exists(tmp_path):
+                os.unlink(tmp_path)
+
+        if not asc_files:
+            self.stdout.write(self.style.WARNING("  → No se encontraron archivos .asc en el ZIP."))
+            return True
+
+        self.stdout.write(f"  Archivos .asc encontrados: {len(asc_files)}")
+
+        # 3. Procesar cada .asc de forma sincrónica (sin Celery)
+        from api.datastage.tasks import procesar_archivo_asc_task
+
+        total_insertados = 0
+        for asc_name in asc_files:
+            self.stdout.write(f"    {asc_name} ... ", ending="")
+            result = procesar_archivo_asc_task(ds.id, ds.organizacion_id, asc_name)
+            if "error" in result:
+                self.stdout.write(self.style.ERROR(f"ERROR: {result['error']}"))
+            else:
+                insertados = result.get("insertados", 0)
+                total_insertados += insertados
+                self.stdout.write(self.style.SUCCESS(f"{insertados} registros"))
+
+        self.stdout.write(f"  Total insertados: {total_insertados}")
+        return True
+
+    # ------------------------------------------------------------------ #
+
+    def _print_summary(self, ok, err, dry_run):
+        self.stdout.write(f"\n{'─' * 60}")
+        self.stdout.write(f"RESUMEN: {ok} exitosos, {err} con error.")
+        if dry_run:
+            self.stdout.write(self.style.WARNING(
+                "MODO PRUEBA: ejecuta sin --dry-run para aplicar los cambios."
+            ))
+        else:
+            self.stdout.write(self.style.SUCCESS("Reprocesado completado."))