fix/de los tickets T2026-05-027, T2025-09-004 y T2025-09-056

2026-06-15 11:18:58 -06:00
parent 7644446267
commit 23ed52c78a
29 changed files with 2992 additions and 987 deletions
--- a/api/reports/services/init.py
+++ b/api/reports/services/init.py
--- a/api/reports/services/datastage_export.py
+++ b/api/reports/services/datastage_export.py
@@ -0,0 +1,557 @@
+"""
+Lógica de exportación de reportes DataStage, extraída de ExportDataStageView
+para poder ejecutarse dentro de una task Celery (sin request/HttpResponse).
+
+Cada builder devuelve una tupla (content_bytes, filename, content_type, total_rows).
+El aislamiento multi-tenant viene resuelto en global_filters['organizacion']
+(la vista lo resuelve con get_org_context antes de encolar).
+"""
+import csv
+import datetime
+import hashlib
+import io
+import uuid
+import zipfile
+
+import openpyxl
+from django.apps import apps
+from django.core.paginator import Paginator
+
+from api.organization.models import Organizacion
+
+MAX_RECORDS_PER_FILE = 500000  # Límite por archivo Excel antes de particionar en ZIP
+
+XLSX_CONTENT_TYPE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
+CSV_CONTENT_TYPE = 'text/csv; charset=utf-8'
+ZIP_CONTENT_TYPE = 'application/zip'
+
+RELATION_FIELDS = ['seccion_aduanera', 'patente', 'pedimento']
+
+
+def safe_excel_value(value):
+    """Convierte cualquier valor a un formato seguro para Excel/CSV."""
+    if value is None:
+        return ''
+    elif isinstance(value, (uuid.UUID,)):
+        return str(value)
+    elif hasattr(value, 'uuid'):
+        return str(value.uuid)
+    elif hasattr(value, 'id'):
+        return str(value.id)
+    elif isinstance(value, (datetime.datetime, datetime.date)):
+        return value.isoformat()
+    elif isinstance(value, (dict, list)):
+        return str(value)
+    else:
+        return str(value)
+
+
+def apply_global_filters_to_model(global_filters, model):
+    """Traduce los filtros globales a filtros ORM según los campos del modelo."""
+    filters = {}
+    model_fields = [f.name for f in model._meta.get_fields()]
+
+    # Organización — FK usa UUID, CharField usa el string tal cual
+    org_value = global_filters.get('organizacion')
+    if org_value and org_value != '' and 'organizacion' in model_fields:
+        field = model._meta.get_field('organizacion')
+        if hasattr(field, 'related_model'):
+            try:
+                filters['organizacion_id'] = uuid.UUID(org_value)
+            except Exception:
+                filters['organizacion_id'] = org_value
+        else:
+            filters['organizacion'] = org_value
+
+    rfc_value = global_filters.get('rfc')
+    if rfc_value and rfc_value != '' and 'rfc' in model_fields:
+        filters['rfc'] = rfc_value
+
+    if global_filters.get('patente'):
+        filters['patente'] = global_filters['patente']
+
+    if global_filters.get('pedimento'):
+        filters['pedimento'] = global_filters['pedimento']
+
+    if 'fecha_pago_real' in model_fields:
+        if global_filters.get('fecha_pago_desde'):
+            filters['fecha_pago_real__gte'] = global_filters['fecha_pago_desde']
+        if global_filters.get('fecha_pago_hasta'):
+            filters['fecha_pago_real__lte'] = global_filters['fecha_pago_hasta']
+
+    return filters
+
+
+def apply_related_filters(global_filters, model, related_keys):
+    """Filtros para modo múltiple: globales + llaves de cruce entre modelos."""
+    filters = {}
+    model_fields = [f.name for f in model._meta.get_fields()]
+
+    if 'organizacion' in model_fields and global_filters.get('organizacion'):
+        org_value = global_filters['organizacion']
+        try:
+            field = model._meta.get_field('organizacion')
+            if hasattr(field, 'related_model'):
+                filters['organizacion_id'] = uuid.UUID(org_value)
+            else:
+                filters['organizacion'] = org_value
+        except Exception:
+            filters['organizacion_id'] = org_value
+
+    if 'rfc' in model_fields and global_filters.get('rfc'):
+        filters['rfc'] = global_filters['rfc']
+
+    if 'fecha_pago_real' in model_fields:
+        if global_filters.get('fecha_pago_desde'):
+            filters['fecha_pago_real__gte'] = global_filters['fecha_pago_desde']
+        if global_filters.get('fecha_pago_hasta'):
+            filters['fecha_pago_real__lte'] = global_filters['fecha_pago_hasta']
+
+    if any(related_keys.values()):
+        if related_keys.get('patentes') and 'patente' in model_fields:
+            filters['patente__in'] = related_keys['patentes']
+        if related_keys.get('pedimentos') and 'pedimento' in model_fields:
+            filters['pedimento__in'] = related_keys['pedimentos']
+        if related_keys.get('datastage_ids') and 'datastage_id' in model_fields:
+            filters['datastage_id__in'] = related_keys['datastage_ids']
+    else:
+        if 'patente' in model_fields and global_filters.get('patente'):
+            filters['patente'] = global_filters['patente']
+        if 'pedimento' in model_fields and global_filters.get('pedimento'):
+            filters['pedimento'] = global_filters['pedimento']
+
+    return filters
+
+
+def get_related_keys_from_filters(global_filters, models_data):
+    """
+    Construye el conjunto de (patente, pedimento, datastage_id) que servirá como
+    llave de cruce entre modelos.
+
+    Regla clave: si el filtro RFC está activo, solo los modelos que tienen el campo
+    'rfc' pueden contribuir a related_keys. Los modelos sin 'rfc' (ej. 505, 506)
+    no se usan como semilla — solo se filtrarán más tarde usando las claves ya
+    construidas, evitando que contaminen el resultado con pedimentos de otros RFC.
+    """
+    related_keys = {
+        'patentes': set(),
+        'pedimentos': set(),
+        'datastage_ids': set()
+    }
+
+    # Sin filtros significativos → sin cruce
+    if not any(v for v in global_filters.values() if v not in [None, '']):
+        return {}
+
+    rfc_filter_active = bool(global_filters.get('rfc'))
+    date_filter_active = bool(global_filters.get('fecha_pago_desde') or global_filters.get('fecha_pago_hasta'))
+    all_records_with_filters = []
+
+    for model_data in models_data:
+        model_name = model_data.get('model')
+        try:
+            model = apps.get_model('datastage', model_name)
+            model_field_names = {f.name for f in model._meta.get_fields() if hasattr(f, 'name')}
+
+            # Un modelo puede ser semilla de related_keys SOLO si tiene campos
+            # para aplicar TODOS los filtros activos
+            if rfc_filter_active and 'rfc' not in model_field_names:
+                continue
+            if date_filter_active and 'fecha_pago_real' not in model_field_names:
+                continue
+
+            filters = apply_global_filters_to_model(global_filters, model)
+            if not filters:
+                continue
+
+            records = model.objects.filter(**filters).values('patente', 'pedimento', 'datastage_id')
+            all_records_with_filters.extend(list(records))
+
+        except LookupError:
+            continue
+
+    if not all_records_with_filters:
+        return {'patentes': set(), 'pedimentos': set(), 'datastage_ids': set()}
+
+    for record in all_records_with_filters:
+        if record.get('patente'):
+            related_keys['patentes'].add(record['patente'])
+        if record.get('pedimento'):
+            related_keys['pedimentos'].add(record['pedimento'])
+        if record.get('datastage_id'):
+            related_keys['datastage_ids'].add(record['datastage_id'])
+
+    return {k: list(v) for k, v in related_keys.items() if v}
+
+
+# ---------------------------------------------------------------------------
+# Exportación simple (un solo modelo)
+# ---------------------------------------------------------------------------
+
+def build_simple_export(model_name, fields, global_filters, export_format, progress_cb=None):
+    progress_cb = progress_cb or (lambda p, m: None)
+
+    try:
+        model = apps.get_model('datastage', model_name)
+    except LookupError:
+        raise ValueError(f'Modelo {model_name} no encontrado')
+
+    filters = apply_global_filters_to_model(global_filters, model)
+    queryset = model.objects.filter(**filters).values(*fields)
+    total_records = queryset.count()
+    progress_cb(20, f'{model_name}: {total_records} registros encontrados')
+
+    if export_format == 'excel':
+        if total_records > MAX_RECORDS_PER_FILE:
+            content, filename, content_type = _simple_excel_partitioned(model_name, fields, queryset, progress_cb)
+        else:
+            content, filename, content_type = _simple_excel(model_name, fields, queryset, progress_cb)
+    else:
+        # CSV no tiene límite de filas — siempre un solo archivo
+        content, filename, content_type = _simple_csv(model_name, fields, queryset, progress_cb)
+
+    return content, filename, content_type, total_records
+
+
+def _simple_excel(model_name, fields, queryset, progress_cb):
+    progress_cb(40, f'Escribiendo Excel de {model_name}...')
+    wb = openpyxl.Workbook()
+    ws = wb.active
+    ws.append(fields)
+    for row in queryset:
+        ws.append([safe_excel_value(row[field]) for field in fields])
+    progress_cb(88, 'Serializando archivo...')
+    output = io.BytesIO()
+    wb.save(output)
+    return output.getvalue(), f'{model_name}.xlsx', XLSX_CONTENT_TYPE
+
+
+def _simple_csv(model_name, fields, queryset, progress_cb):
+    progress_cb(40, f'Escribiendo CSV de {model_name}...')
+    buf = io.StringIO()
+    writer = csv.DictWriter(buf, fieldnames=fields)
+    writer.writeheader()
+    for row in queryset:
+        writer.writerow(row)
+    progress_cb(88, 'Serializando archivo...')
+    return buf.getvalue().encode('utf-8'), f'{model_name}.csv', CSV_CONTENT_TYPE
+
+
+def _simple_excel_partitioned(model_name, fields, queryset, progress_cb):
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+        paginator = Paginator(queryset, MAX_RECORDS_PER_FILE)
+        for page_num in paginator.page_range:
+            pct = 25 + int((page_num / paginator.num_pages) * 55)
+            progress_cb(pct, f'Particionando {model_name}: parte {page_num}/{paginator.num_pages}')
+            page = paginator.page(page_num)
+
+            wb = openpyxl.Workbook()
+            ws = wb.active
+            ws.title = f'Parte_{page_num}'[:31]
+            ws.append(fields)
+            for row in page.object_list:
+                ws.append([safe_excel_value(row[field]) for field in fields])
+
+            part_buffer = io.BytesIO()
+            wb.save(part_buffer)
+            zip_file.writestr(f'{model_name}_part{page_num}.xlsx', part_buffer.getvalue())
+
+    progress_cb(88, 'Serializando archivo...')
+    return zip_buffer.getvalue(), f'{model_name}_particionado.zip', ZIP_CONTENT_TYPE
+
+
+def _simple_csv_partitioned(model_name, fields, queryset, progress_cb):
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+        paginator = Paginator(queryset, MAX_RECORDS_PER_FILE)
+        for page_num in paginator.page_range:
+            pct = 25 + int((page_num / paginator.num_pages) * 55)
+            progress_cb(pct, f'Particionando {model_name}: parte {page_num}/{paginator.num_pages}')
+            page = paginator.page(page_num)
+
+            csv_buffer = io.StringIO()
+            writer = csv.writer(csv_buffer)
+            writer.writerow(fields)
+            for row in page.object_list:
+                writer.writerow([safe_excel_value(row[field]) for field in fields])
+
+            zip_file.writestr(f'{model_name}_part{page_num}.csv', csv_buffer.getvalue())
+
+    progress_cb(88, 'Serializando archivo...')
+    return zip_buffer.getvalue(), f'{model_name}_particionado.zip', ZIP_CONTENT_TYPE
+
+
+# ---------------------------------------------------------------------------
+# Exportación múltiple (varios modelos agrupados por llaves de cruce)
+# ---------------------------------------------------------------------------
+
+def _collect_multiple_data(models_data, global_filters, related_keys, progress_cb):
+    """
+    Recolecta y agrupa los registros de todos los modelos por la llave
+    seccion_aduanera + patente + pedimento. Mapea organizacion_id → nombre.
+    """
+    org_mapping = {str(org.id): org.nombre for org in Organizacion.objects.all()}
+    all_models_data = {}
+    total_models = len(models_data) or 1
+
+    for idx, model_data in enumerate(models_data):
+        model_name = model_data.get('model')
+        fields = model_data.get('fields', [])
+
+        if not model_name or not fields:
+            continue
+
+        # Normalizar campos: 'organizacion' → 'organizacion_id', sin duplicados
+        normalized_fields = []
+        for f in fields:
+            key = f.strip() if isinstance(f, str) else f
+            if isinstance(key, str) and key.lower() == 'organizacion':
+                if 'organizacion_id' not in normalized_fields:
+                    normalized_fields.append('organizacion_id')
+            else:
+                if key not in normalized_fields:
+                    normalized_fields.append(key)
+        fields = normalized_fields
+
+        for req_field in RELATION_FIELDS:
+            if req_field not in fields:
+                fields.append(req_field)
+
+        try:
+            model = apps.get_model('datastage', model_name)
+            model_field_names = [f.name for f in model._meta.get_fields() if hasattr(f, 'name')]
+            if 'organizacion_id' not in fields and 'organizacion_id' in model_field_names:
+                fields.append('organizacion_id')
+
+            filters = apply_related_filters(global_filters, model, related_keys)
+            queryset = model.objects.filter(**filters).values(*fields) if filters else model.objects.none()
+
+            count = queryset.count()
+            pct = 20 + int((idx / total_models) * 55)
+            progress_cb(pct, f'Modelo {idx + 1}/{total_models}: {model_name} ({count} registros)')
+            if count == 0:
+                continue
+
+            relation_fields = [fn for fn in RELATION_FIELDS if fn in fields]
+            if not relation_fields:
+                relation_fields = ['datastage_id'] if 'datastage_id' in fields else [fields[0]]
+
+            for record in queryset:
+                key_parts = [str(record[rf]) for rf in relation_fields if rf in record and record[rf] is not None]
+                if not key_parts:
+                    key = hashlib.md5(str(sorted(record.items())).encode()).hexdigest()[:10]
+                else:
+                    key = "_".join(key_parts)
+
+                processed_record = {}
+                for field_name, value in record.items():
+                    if field_name == 'organizacion_id' and value:
+                        org_id_str = str(value)
+                        if org_id_str in org_mapping:
+                            processed_value = org_mapping[org_id_str]
+                        else:
+                            try:
+                                org = Organizacion.objects.filter(id=value).first()
+                                processed_value = org.nombre if org else org_id_str
+                                org_mapping[org_id_str] = processed_value
+                            except Exception:
+                                processed_value = org_id_str
+                    else:
+                        processed_value = value
+
+                    if field_name in relation_fields:
+                        prefixed_field_name = field_name
+                    else:
+                        prefixed_field_name = f"{model_name}_{field_name}"
+
+                    if field_name == 'organizacion_id':
+                        prefixed_field_name = prefixed_field_name.replace('organizacion_id', 'organizacion_nombre')
+
+                    processed_record[prefixed_field_name] = safe_excel_value(processed_value)
+
+                if key not in all_models_data:
+                    all_models_data[key] = {'relation_fields': {}, 'model_records': {}}
+
+                for rel_field in relation_fields:
+                    if rel_field in record:
+                        all_models_data[key]['relation_fields'][rel_field] = record[rel_field]
+
+                if model_name not in all_models_data[key]['model_records']:
+                    all_models_data[key]['model_records'][model_name] = []
+
+                all_models_data[key]['model_records'][model_name].append(processed_record)
+
+        except LookupError:
+            continue
+
+    return all_models_data
+
+
+def _build_combined_rows(all_models_data):
+    """Construye filas combinadas — repite el último registro en lugar de dejar vacíos."""
+    combined_rows = []
+    for key, data in all_models_data.items():
+        relation_fields_data = data['relation_fields']
+        model_records = data['model_records']
+
+        max_records_per_key = max((len(recs) for recs in model_records.values()), default=1)
+
+        for i in range(max_records_per_key):
+            row_data = {}
+            for rel_field, rel_value in relation_fields_data.items():
+                row_data[rel_field] = safe_excel_value(rel_value)
+            for model_name, records in model_records.items():
+                # Usar posición i o el último registro disponible
+                record = records[i] if i < len(records) else records[-1]
+                for field_name, value in record.items():
+                    row_data[field_name] = value
+            combined_rows.append(row_data)
+
+    return combined_rows
+
+
+def _ordered_fields(combined_rows):
+    """Encabezados: campos de relación primero, luego organización, luego el resto."""
+    all_fields_set = set()
+    for row in combined_rows:
+        all_fields_set.update(row.keys())
+
+    all_fields = []
+    for rel_field in RELATION_FIELDS:
+        if rel_field in all_fields_set:
+            all_fields.append(rel_field)
+            all_fields_set.discard(rel_field)
+
+    org_fields = sorted(f for f in all_fields_set if 'organizacion' in f.lower())
+    for org_field in org_fields:
+        all_fields.append(org_field)
+        all_fields_set.discard(org_field)
+
+    all_fields.extend(sorted(all_fields_set))
+    return all_fields
+
+
+def build_multiple_export(models_data, global_filters, export_format, progress_cb=None):
+    progress_cb = progress_cb or (lambda p, m: None)
+
+    progress_cb(15, 'Resolviendo llaves de cruce entre modelos...')
+    related_keys = get_related_keys_from_filters(global_filters, models_data)
+
+    all_models_data = _collect_multiple_data(models_data, global_filters, related_keys, progress_cb)
+
+    # Sin datos → archivo con mensaje, no error (el frontend espera un archivo)
+    if not all_models_data:
+        if export_format == 'excel':
+            wb = openpyxl.Workbook()
+            ws = wb.active
+            ws.title = "Sin datos"
+            ws.append(["No se encontraron datos para los filtros especificados"])
+            output = io.BytesIO()
+            wb.save(output)
+            return output.getvalue(), 'datastage_sin_datos.xlsx', XLSX_CONTENT_TYPE, 0
+        else:
+            buf = io.StringIO()
+            csv.writer(buf).writerow(['No se encontraron datos para los filtros especificados'])
+            return buf.getvalue().encode('utf-8'), 'datastage_sin_datos.csv', CSV_CONTENT_TYPE, 0
+
+    progress_cb(80, 'Combinando filas...')
+    combined_rows = _build_combined_rows(all_models_data)
+    all_fields = _ordered_fields(combined_rows)
+    total_rows = len(combined_rows)
+
+    if export_format == 'excel':
+        content, filename, content_type = _multiple_excel(combined_rows, all_fields, progress_cb)
+    else:
+        content, filename, content_type = _multiple_csv(combined_rows, all_fields, progress_cb)
+
+    return content, filename, content_type, total_rows
+
+
+def _multiple_excel(combined_rows, all_fields, progress_cb):
+    now_str = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
+    title_row = ["Reporte Datastage"]
+    date_row = [f"Generado: {now_str}"]
+
+    def _write_sheet(ws, sheet_name, page_rows):
+        ws.title = sheet_name[:31]
+        ws.append(title_row)
+        ws.append(date_row)
+        ws.append([])
+        ws.append(all_fields)
+        for row_data in page_rows:
+            ws.append([row_data.get(field, '') for field in all_fields])
+        for column in ws.columns:
+            max_length = 0
+            col_letter = column[0].column_letter
+            for cell in column:
+                try:
+                    if len(str(cell.value)) > max_length:
+                        max_length = len(str(cell.value))
+                except Exception:
+                    pass
+            ws.column_dimensions[col_letter].width = min(max_length + 2, 50)
+
+    # Excel directo si cabe en un archivo; ZIP solo si se necesita particionar
+    paginator = Paginator(combined_rows, MAX_RECORDS_PER_FILE)
+
+    if paginator.num_pages == 1:
+        progress_cb(88, 'Serializando archivo...')
+        wb = openpyxl.Workbook()
+        _write_sheet(wb.active, "Datastage", paginator.page(1).object_list)
+        output = io.BytesIO()
+        wb.save(output)
+        return output.getvalue(), 'datastage_reporte.xlsx', XLSX_CONTENT_TYPE
+
+    zip_buffer = io.BytesIO()
+    with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
+        for page_num in paginator.page_range:
+            progress_cb(80 + int((page_num / paginator.num_pages) * 8),
+                        f'Particionando: parte {page_num}/{paginator.num_pages}')
+            page = paginator.page(page_num)
+            current_wb = openpyxl.Workbook()
+            _write_sheet(current_wb.active, f"Datastage_p{page_num}", page.object_list)
+            part_buffer = io.BytesIO()
+            current_wb.save(part_buffer)
+            zip_file.writestr(f"datastage_part{page_num}.xlsx", part_buffer.getvalue())
+
+    progress_cb(88, 'Serializando archivo...')
+    return zip_buffer.getvalue(), 'datastage_combinado.zip', ZIP_CONTENT_TYPE
+
+
+def _multiple_csv(combined_rows, all_fields, progress_cb):
+    progress_cb(88, 'Serializando archivo...')
+    buf = io.StringIO()
+    writer = csv.writer(buf)
+    writer.writerow(all_fields)
+    for row_data in combined_rows:
+        writer.writerow([row_data.get(field, '') for field in all_fields])
+    return buf.getvalue().encode('utf-8'), 'datastage_reporte.csv', CSV_CONTENT_TYPE
+
+
+# ---------------------------------------------------------------------------
+# Dispatcher
+# ---------------------------------------------------------------------------
+
+def build_datastage_export(payload, progress_cb=None):
+    """
+    Genera el reporte DataStage a partir del payload persistido en
+    ReportDocument.filters. Lanza ValueError si el payload es inválido.
+
+    Retorna (content_bytes, filename, content_type, total_rows).
+    """
+    modo = payload.get('modo', 'simple')
+    export_format = payload.get('format', 'csv')
+    global_filters = payload.get('globalFilters') or {}
+
+    if modo == 'multiple':
+        models_data = payload.get('models') or []
+        if not models_data:
+            raise ValueError('models es requerido para exportación múltiple')
+        return build_multiple_export(models_data, global_filters, export_format, progress_cb)
+
+    model_name = payload.get('model')
+    fields = payload.get('fields')
+    if not model_name or not fields:
+        raise ValueError('model y fields son requeridos para exportación simple')
+    return build_simple_export(model_name, fields, global_filters, export_format, progress_cb)