fix/de los tickets T2026-05-027, T2025-09-004 y T2025-09-056

This commit is contained in:
2026-06-15 11:18:58 -06:00
parent 7644446267
commit 23ed52c78a
29 changed files with 2992 additions and 987 deletions

View File

View File

@@ -0,0 +1,557 @@
"""
Lógica de exportación de reportes DataStage, extraída de ExportDataStageView
para poder ejecutarse dentro de una task Celery (sin request/HttpResponse).
Cada builder devuelve una tupla (content_bytes, filename, content_type, total_rows).
El aislamiento multi-tenant viene resuelto en global_filters['organizacion']
(la vista lo resuelve con get_org_context antes de encolar).
"""
import csv
import datetime
import hashlib
import io
import uuid
import zipfile
import openpyxl
from django.apps import apps
from django.core.paginator import Paginator
from api.organization.models import Organizacion
MAX_RECORDS_PER_FILE = 500000 # Límite por archivo Excel antes de particionar en ZIP
XLSX_CONTENT_TYPE = 'application/vnd.openxmlformats-officedocument.spreadsheetml.sheet'
CSV_CONTENT_TYPE = 'text/csv; charset=utf-8'
ZIP_CONTENT_TYPE = 'application/zip'
RELATION_FIELDS = ['seccion_aduanera', 'patente', 'pedimento']
def safe_excel_value(value):
"""Convierte cualquier valor a un formato seguro para Excel/CSV."""
if value is None:
return ''
elif isinstance(value, (uuid.UUID,)):
return str(value)
elif hasattr(value, 'uuid'):
return str(value.uuid)
elif hasattr(value, 'id'):
return str(value.id)
elif isinstance(value, (datetime.datetime, datetime.date)):
return value.isoformat()
elif isinstance(value, (dict, list)):
return str(value)
else:
return str(value)
def apply_global_filters_to_model(global_filters, model):
"""Traduce los filtros globales a filtros ORM según los campos del modelo."""
filters = {}
model_fields = [f.name for f in model._meta.get_fields()]
# Organización — FK usa UUID, CharField usa el string tal cual
org_value = global_filters.get('organizacion')
if org_value and org_value != '' and 'organizacion' in model_fields:
field = model._meta.get_field('organizacion')
if hasattr(field, 'related_model'):
try:
filters['organizacion_id'] = uuid.UUID(org_value)
except Exception:
filters['organizacion_id'] = org_value
else:
filters['organizacion'] = org_value
rfc_value = global_filters.get('rfc')
if rfc_value and rfc_value != '' and 'rfc' in model_fields:
filters['rfc'] = rfc_value
if global_filters.get('patente'):
filters['patente'] = global_filters['patente']
if global_filters.get('pedimento'):
filters['pedimento'] = global_filters['pedimento']
if 'fecha_pago_real' in model_fields:
if global_filters.get('fecha_pago_desde'):
filters['fecha_pago_real__gte'] = global_filters['fecha_pago_desde']
if global_filters.get('fecha_pago_hasta'):
filters['fecha_pago_real__lte'] = global_filters['fecha_pago_hasta']
return filters
def apply_related_filters(global_filters, model, related_keys):
"""Filtros para modo múltiple: globales + llaves de cruce entre modelos."""
filters = {}
model_fields = [f.name for f in model._meta.get_fields()]
if 'organizacion' in model_fields and global_filters.get('organizacion'):
org_value = global_filters['organizacion']
try:
field = model._meta.get_field('organizacion')
if hasattr(field, 'related_model'):
filters['organizacion_id'] = uuid.UUID(org_value)
else:
filters['organizacion'] = org_value
except Exception:
filters['organizacion_id'] = org_value
if 'rfc' in model_fields and global_filters.get('rfc'):
filters['rfc'] = global_filters['rfc']
if 'fecha_pago_real' in model_fields:
if global_filters.get('fecha_pago_desde'):
filters['fecha_pago_real__gte'] = global_filters['fecha_pago_desde']
if global_filters.get('fecha_pago_hasta'):
filters['fecha_pago_real__lte'] = global_filters['fecha_pago_hasta']
if any(related_keys.values()):
if related_keys.get('patentes') and 'patente' in model_fields:
filters['patente__in'] = related_keys['patentes']
if related_keys.get('pedimentos') and 'pedimento' in model_fields:
filters['pedimento__in'] = related_keys['pedimentos']
if related_keys.get('datastage_ids') and 'datastage_id' in model_fields:
filters['datastage_id__in'] = related_keys['datastage_ids']
else:
if 'patente' in model_fields and global_filters.get('patente'):
filters['patente'] = global_filters['patente']
if 'pedimento' in model_fields and global_filters.get('pedimento'):
filters['pedimento'] = global_filters['pedimento']
return filters
def get_related_keys_from_filters(global_filters, models_data):
"""
Construye el conjunto de (patente, pedimento, datastage_id) que servirá como
llave de cruce entre modelos.
Regla clave: si el filtro RFC está activo, solo los modelos que tienen el campo
'rfc' pueden contribuir a related_keys. Los modelos sin 'rfc' (ej. 505, 506)
no se usan como semilla — solo se filtrarán más tarde usando las claves ya
construidas, evitando que contaminen el resultado con pedimentos de otros RFC.
"""
related_keys = {
'patentes': set(),
'pedimentos': set(),
'datastage_ids': set()
}
# Sin filtros significativos → sin cruce
if not any(v for v in global_filters.values() if v not in [None, '']):
return {}
rfc_filter_active = bool(global_filters.get('rfc'))
date_filter_active = bool(global_filters.get('fecha_pago_desde') or global_filters.get('fecha_pago_hasta'))
all_records_with_filters = []
for model_data in models_data:
model_name = model_data.get('model')
try:
model = apps.get_model('datastage', model_name)
model_field_names = {f.name for f in model._meta.get_fields() if hasattr(f, 'name')}
# Un modelo puede ser semilla de related_keys SOLO si tiene campos
# para aplicar TODOS los filtros activos
if rfc_filter_active and 'rfc' not in model_field_names:
continue
if date_filter_active and 'fecha_pago_real' not in model_field_names:
continue
filters = apply_global_filters_to_model(global_filters, model)
if not filters:
continue
records = model.objects.filter(**filters).values('patente', 'pedimento', 'datastage_id')
all_records_with_filters.extend(list(records))
except LookupError:
continue
if not all_records_with_filters:
return {'patentes': set(), 'pedimentos': set(), 'datastage_ids': set()}
for record in all_records_with_filters:
if record.get('patente'):
related_keys['patentes'].add(record['patente'])
if record.get('pedimento'):
related_keys['pedimentos'].add(record['pedimento'])
if record.get('datastage_id'):
related_keys['datastage_ids'].add(record['datastage_id'])
return {k: list(v) for k, v in related_keys.items() if v}
# ---------------------------------------------------------------------------
# Exportación simple (un solo modelo)
# ---------------------------------------------------------------------------
def build_simple_export(model_name, fields, global_filters, export_format, progress_cb=None):
progress_cb = progress_cb or (lambda p, m: None)
try:
model = apps.get_model('datastage', model_name)
except LookupError:
raise ValueError(f'Modelo {model_name} no encontrado')
filters = apply_global_filters_to_model(global_filters, model)
queryset = model.objects.filter(**filters).values(*fields)
total_records = queryset.count()
progress_cb(20, f'{model_name}: {total_records} registros encontrados')
if export_format == 'excel':
if total_records > MAX_RECORDS_PER_FILE:
content, filename, content_type = _simple_excel_partitioned(model_name, fields, queryset, progress_cb)
else:
content, filename, content_type = _simple_excel(model_name, fields, queryset, progress_cb)
else:
# CSV no tiene límite de filas — siempre un solo archivo
content, filename, content_type = _simple_csv(model_name, fields, queryset, progress_cb)
return content, filename, content_type, total_records
def _simple_excel(model_name, fields, queryset, progress_cb):
progress_cb(40, f'Escribiendo Excel de {model_name}...')
wb = openpyxl.Workbook()
ws = wb.active
ws.append(fields)
for row in queryset:
ws.append([safe_excel_value(row[field]) for field in fields])
progress_cb(88, 'Serializando archivo...')
output = io.BytesIO()
wb.save(output)
return output.getvalue(), f'{model_name}.xlsx', XLSX_CONTENT_TYPE
def _simple_csv(model_name, fields, queryset, progress_cb):
progress_cb(40, f'Escribiendo CSV de {model_name}...')
buf = io.StringIO()
writer = csv.DictWriter(buf, fieldnames=fields)
writer.writeheader()
for row in queryset:
writer.writerow(row)
progress_cb(88, 'Serializando archivo...')
return buf.getvalue().encode('utf-8'), f'{model_name}.csv', CSV_CONTENT_TYPE
def _simple_excel_partitioned(model_name, fields, queryset, progress_cb):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
paginator = Paginator(queryset, MAX_RECORDS_PER_FILE)
for page_num in paginator.page_range:
pct = 25 + int((page_num / paginator.num_pages) * 55)
progress_cb(pct, f'Particionando {model_name}: parte {page_num}/{paginator.num_pages}')
page = paginator.page(page_num)
wb = openpyxl.Workbook()
ws = wb.active
ws.title = f'Parte_{page_num}'[:31]
ws.append(fields)
for row in page.object_list:
ws.append([safe_excel_value(row[field]) for field in fields])
part_buffer = io.BytesIO()
wb.save(part_buffer)
zip_file.writestr(f'{model_name}_part{page_num}.xlsx', part_buffer.getvalue())
progress_cb(88, 'Serializando archivo...')
return zip_buffer.getvalue(), f'{model_name}_particionado.zip', ZIP_CONTENT_TYPE
def _simple_csv_partitioned(model_name, fields, queryset, progress_cb):
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
paginator = Paginator(queryset, MAX_RECORDS_PER_FILE)
for page_num in paginator.page_range:
pct = 25 + int((page_num / paginator.num_pages) * 55)
progress_cb(pct, f'Particionando {model_name}: parte {page_num}/{paginator.num_pages}')
page = paginator.page(page_num)
csv_buffer = io.StringIO()
writer = csv.writer(csv_buffer)
writer.writerow(fields)
for row in page.object_list:
writer.writerow([safe_excel_value(row[field]) for field in fields])
zip_file.writestr(f'{model_name}_part{page_num}.csv', csv_buffer.getvalue())
progress_cb(88, 'Serializando archivo...')
return zip_buffer.getvalue(), f'{model_name}_particionado.zip', ZIP_CONTENT_TYPE
# ---------------------------------------------------------------------------
# Exportación múltiple (varios modelos agrupados por llaves de cruce)
# ---------------------------------------------------------------------------
def _collect_multiple_data(models_data, global_filters, related_keys, progress_cb):
"""
Recolecta y agrupa los registros de todos los modelos por la llave
seccion_aduanera + patente + pedimento. Mapea organizacion_id → nombre.
"""
org_mapping = {str(org.id): org.nombre for org in Organizacion.objects.all()}
all_models_data = {}
total_models = len(models_data) or 1
for idx, model_data in enumerate(models_data):
model_name = model_data.get('model')
fields = model_data.get('fields', [])
if not model_name or not fields:
continue
# Normalizar campos: 'organizacion' → 'organizacion_id', sin duplicados
normalized_fields = []
for f in fields:
key = f.strip() if isinstance(f, str) else f
if isinstance(key, str) and key.lower() == 'organizacion':
if 'organizacion_id' not in normalized_fields:
normalized_fields.append('organizacion_id')
else:
if key not in normalized_fields:
normalized_fields.append(key)
fields = normalized_fields
for req_field in RELATION_FIELDS:
if req_field not in fields:
fields.append(req_field)
try:
model = apps.get_model('datastage', model_name)
model_field_names = [f.name for f in model._meta.get_fields() if hasattr(f, 'name')]
if 'organizacion_id' not in fields and 'organizacion_id' in model_field_names:
fields.append('organizacion_id')
filters = apply_related_filters(global_filters, model, related_keys)
queryset = model.objects.filter(**filters).values(*fields) if filters else model.objects.none()
count = queryset.count()
pct = 20 + int((idx / total_models) * 55)
progress_cb(pct, f'Modelo {idx + 1}/{total_models}: {model_name} ({count} registros)')
if count == 0:
continue
relation_fields = [fn for fn in RELATION_FIELDS if fn in fields]
if not relation_fields:
relation_fields = ['datastage_id'] if 'datastage_id' in fields else [fields[0]]
for record in queryset:
key_parts = [str(record[rf]) for rf in relation_fields if rf in record and record[rf] is not None]
if not key_parts:
key = hashlib.md5(str(sorted(record.items())).encode()).hexdigest()[:10]
else:
key = "_".join(key_parts)
processed_record = {}
for field_name, value in record.items():
if field_name == 'organizacion_id' and value:
org_id_str = str(value)
if org_id_str in org_mapping:
processed_value = org_mapping[org_id_str]
else:
try:
org = Organizacion.objects.filter(id=value).first()
processed_value = org.nombre if org else org_id_str
org_mapping[org_id_str] = processed_value
except Exception:
processed_value = org_id_str
else:
processed_value = value
if field_name in relation_fields:
prefixed_field_name = field_name
else:
prefixed_field_name = f"{model_name}_{field_name}"
if field_name == 'organizacion_id':
prefixed_field_name = prefixed_field_name.replace('organizacion_id', 'organizacion_nombre')
processed_record[prefixed_field_name] = safe_excel_value(processed_value)
if key not in all_models_data:
all_models_data[key] = {'relation_fields': {}, 'model_records': {}}
for rel_field in relation_fields:
if rel_field in record:
all_models_data[key]['relation_fields'][rel_field] = record[rel_field]
if model_name not in all_models_data[key]['model_records']:
all_models_data[key]['model_records'][model_name] = []
all_models_data[key]['model_records'][model_name].append(processed_record)
except LookupError:
continue
return all_models_data
def _build_combined_rows(all_models_data):
"""Construye filas combinadas — repite el último registro en lugar de dejar vacíos."""
combined_rows = []
for key, data in all_models_data.items():
relation_fields_data = data['relation_fields']
model_records = data['model_records']
max_records_per_key = max((len(recs) for recs in model_records.values()), default=1)
for i in range(max_records_per_key):
row_data = {}
for rel_field, rel_value in relation_fields_data.items():
row_data[rel_field] = safe_excel_value(rel_value)
for model_name, records in model_records.items():
# Usar posición i o el último registro disponible
record = records[i] if i < len(records) else records[-1]
for field_name, value in record.items():
row_data[field_name] = value
combined_rows.append(row_data)
return combined_rows
def _ordered_fields(combined_rows):
"""Encabezados: campos de relación primero, luego organización, luego el resto."""
all_fields_set = set()
for row in combined_rows:
all_fields_set.update(row.keys())
all_fields = []
for rel_field in RELATION_FIELDS:
if rel_field in all_fields_set:
all_fields.append(rel_field)
all_fields_set.discard(rel_field)
org_fields = sorted(f for f in all_fields_set if 'organizacion' in f.lower())
for org_field in org_fields:
all_fields.append(org_field)
all_fields_set.discard(org_field)
all_fields.extend(sorted(all_fields_set))
return all_fields
def build_multiple_export(models_data, global_filters, export_format, progress_cb=None):
progress_cb = progress_cb or (lambda p, m: None)
progress_cb(15, 'Resolviendo llaves de cruce entre modelos...')
related_keys = get_related_keys_from_filters(global_filters, models_data)
all_models_data = _collect_multiple_data(models_data, global_filters, related_keys, progress_cb)
# Sin datos → archivo con mensaje, no error (el frontend espera un archivo)
if not all_models_data:
if export_format == 'excel':
wb = openpyxl.Workbook()
ws = wb.active
ws.title = "Sin datos"
ws.append(["No se encontraron datos para los filtros especificados"])
output = io.BytesIO()
wb.save(output)
return output.getvalue(), 'datastage_sin_datos.xlsx', XLSX_CONTENT_TYPE, 0
else:
buf = io.StringIO()
csv.writer(buf).writerow(['No se encontraron datos para los filtros especificados'])
return buf.getvalue().encode('utf-8'), 'datastage_sin_datos.csv', CSV_CONTENT_TYPE, 0
progress_cb(80, 'Combinando filas...')
combined_rows = _build_combined_rows(all_models_data)
all_fields = _ordered_fields(combined_rows)
total_rows = len(combined_rows)
if export_format == 'excel':
content, filename, content_type = _multiple_excel(combined_rows, all_fields, progress_cb)
else:
content, filename, content_type = _multiple_csv(combined_rows, all_fields, progress_cb)
return content, filename, content_type, total_rows
def _multiple_excel(combined_rows, all_fields, progress_cb):
now_str = datetime.datetime.now().strftime('%d/%m/%Y %H:%M:%S')
title_row = ["Reporte Datastage"]
date_row = [f"Generado: {now_str}"]
def _write_sheet(ws, sheet_name, page_rows):
ws.title = sheet_name[:31]
ws.append(title_row)
ws.append(date_row)
ws.append([])
ws.append(all_fields)
for row_data in page_rows:
ws.append([row_data.get(field, '') for field in all_fields])
for column in ws.columns:
max_length = 0
col_letter = column[0].column_letter
for cell in column:
try:
if len(str(cell.value)) > max_length:
max_length = len(str(cell.value))
except Exception:
pass
ws.column_dimensions[col_letter].width = min(max_length + 2, 50)
# Excel directo si cabe en un archivo; ZIP solo si se necesita particionar
paginator = Paginator(combined_rows, MAX_RECORDS_PER_FILE)
if paginator.num_pages == 1:
progress_cb(88, 'Serializando archivo...')
wb = openpyxl.Workbook()
_write_sheet(wb.active, "Datastage", paginator.page(1).object_list)
output = io.BytesIO()
wb.save(output)
return output.getvalue(), 'datastage_reporte.xlsx', XLSX_CONTENT_TYPE
zip_buffer = io.BytesIO()
with zipfile.ZipFile(zip_buffer, 'w', zipfile.ZIP_DEFLATED) as zip_file:
for page_num in paginator.page_range:
progress_cb(80 + int((page_num / paginator.num_pages) * 8),
f'Particionando: parte {page_num}/{paginator.num_pages}')
page = paginator.page(page_num)
current_wb = openpyxl.Workbook()
_write_sheet(current_wb.active, f"Datastage_p{page_num}", page.object_list)
part_buffer = io.BytesIO()
current_wb.save(part_buffer)
zip_file.writestr(f"datastage_part{page_num}.xlsx", part_buffer.getvalue())
progress_cb(88, 'Serializando archivo...')
return zip_buffer.getvalue(), 'datastage_combinado.zip', ZIP_CONTENT_TYPE
def _multiple_csv(combined_rows, all_fields, progress_cb):
progress_cb(88, 'Serializando archivo...')
buf = io.StringIO()
writer = csv.writer(buf)
writer.writerow(all_fields)
for row_data in combined_rows:
writer.writerow([row_data.get(field, '') for field in all_fields])
return buf.getvalue().encode('utf-8'), 'datastage_reporte.csv', CSV_CONTENT_TYPE
# ---------------------------------------------------------------------------
# Dispatcher
# ---------------------------------------------------------------------------
def build_datastage_export(payload, progress_cb=None):
"""
Genera el reporte DataStage a partir del payload persistido en
ReportDocument.filters. Lanza ValueError si el payload es inválido.
Retorna (content_bytes, filename, content_type, total_rows).
"""
modo = payload.get('modo', 'simple')
export_format = payload.get('format', 'csv')
global_filters = payload.get('globalFilters') or {}
if modo == 'multiple':
models_data = payload.get('models') or []
if not models_data:
raise ValueError('models es requerido para exportación múltiple')
return build_multiple_export(models_data, global_filters, export_format, progress_cb)
model_name = payload.get('model')
fields = payload.get('fields')
if not model_name or not fields:
raise ValueError('model y fields son requeridos para exportación simple')
return build_simple_export(model_name, fields, global_filters, export_format, progress_cb)