mitigar la duplicidad de archivos a la hora de hacer bulk de documentos

2025-12-29 07:22:30 -07:00
parent 22f1bc5390
commit 4b2f3192d0
1 changed files with 129 additions and 41 deletions
--- a/api/customs/views.py
+++ b/api/customs/views.py
@@ -49,7 +49,9 @@ from django.core.files.base import ContentFile
 from django.db import transaction
 from rest_framework.parsers import MultiPartParser, FormParser
 from api.record.models import Document, DocumentType, Fuente
-
+from unicodedata import normalize 
 from datetime import datetime
 from django.utils import timezone
 # Importar rarfile de manera opcional
 try:
    import rarfile
@@ -798,56 +800,61 @@ class ViewSetPedimento(LoggingMixin, viewsets.ModelViewSet, OrganizacionFiltrada
                                })
                                continue
                        else:
                            print(f"♻️ Usando pedimento existente: ID {existing_pedimento.id}")
                            # Usar pedimento existente
                            pedimento = existing_pedimento
                        print(f"🔄 Iniciando creación de documento para pedimento ID: {pedimento.id}")
                        # Crear documento asociado al pedimento
                        try:
                            print("📖 Leyendo archivo desde directorio temporal...")
                            # Leer el archivo desde el directorio temporal
                            with open(file_path, 'rb') as f:
                                file_content = f.read()
-                            print(f"📄 Archivo leído: {len(file_content)} bytes")
+                            # Obtener información del archivo
-                            # Crear ContentFile que Django puede manejar correctamente
+                            extension = os.path.splitext(file_name)[1].lower().lstrip('.')
                            # Buscar todos los documentos existentes para este pedimento
                            existing_documents = Document.objects.filter(
                                pedimento_id=pedimento.id,
                                organizacion=organizacion
                            )
                            # Buscar si ya existe un documento con el mismo nombre base
                            existing_document = None
                            for doc in existing_documents:
                                if is_same_document(doc, file_name):
                                    existing_document = doc
                                    print(f"✅ Encontrado documento existente: ID {doc.id}")
                                    break
                            # Crear ContentFile
                            django_file = ContentFile(file_content, name=file_name)
-                            # # Verificar si el documento ya existe para este pedimento y archivo
+                            if existing_document:
-                            # print("🔍 Verificando existencia previa del documento...")
+                                # Opcional: Eliminar el archivo físico anterior
                                try:
                                    if existing_document.archivo and os.path.exists(existing_document.archivo.path):
                                        os.remove(existing_document.archivo.path)
                                except (ValueError, OSError) as e:
                                    print(f"No se pudo eliminar archivo físico anterior: {str(e)}")
-                            # # Reemplazar múltiples caracteres
+                                # Actualizar el documento existente
-                            # normalized_file_name = file_name.replace(" ", "_")
+                                existing_document.archivo = django_file
                                existing_document.size = len(file_content)
                                existing_document.extension = extension
                                existing_document.updated_at = timezone.now()  # Si tienes este campo
                                existing_document.save()
-                            # file_name_without_extension = normalized_file_name.rsplit('.', 1)[0]
+                            else:
-                            # extension_file = os.path.splitext(normalized_file_name)[1].lower().lstrip('.')
+                                # Crear nuevo documento
                            # existing_document = Document.objects.filter(
                            #     pedimento_id=pedimento.id,
                            #     archivo__contains=file_name_without_extension,
                            #     extension=extension_file
                            # ).first()
                            # if existing_document:
                            #     print(f"Documento existente encontrado, omitiendo creación: ID {existing_document.id}")
                            #     continue
                            print(f"Creando documento para archivo: {file_name}")
                            # Crear documento - Django automáticamente guardará el archivo en media/documents/
                                document = Document.objects.create(
                                    organizacion=organizacion,
                                    pedimento_id=pedimento.id,
                                    document_type=document_type,
-                                fuente_id=4,  # Fuente: Carga Plataforma
+                                    fuente_id=4,
                                    archivo=django_file,
                                    size=len(file_content),
-                                extension=os.path.splitext(file_name)[1].lower().lstrip('.')
+                                    extension=extension
                                )
                            print(f"Documento creado exitosamente: {document.id}")
                            documents_created += 1
                            print(f"📊 Total documentos creados hasta ahora: {documents_created}")
                        except Exception as e:
                            print(f"❌ Error al crear documento: {str(e)}")
@@ -1751,3 +1758,84 @@ class ImportadorViewSet(viewsets.ModelViewSet, OrganizacionFiltradaMixin):
        raise ValueError("Usuario no autenticado o sin permisos para actualizar Importador")
    my_tags = ['Importadores']
 # helper | reglas para formato de docuemnto antes de cargarlo
 def normalize_filename(filename):
    """
    Normaliza el nombre del archivo removiendo caracteres especiales,
    espacios y asegurando consistencia.
    """
    filename = normalize('NFKD', filename).encode('ASCII', 'ignore').decode('ASCII')
    filename = re.sub(r'[^\w\s.-]', '_', filename)  # Remover caracteres no alfanuméricos
    filename = re.sub(r'[\s()]+', '_', filename)  # Reemplazar espacios y paréntesis
    filename = re.sub(r'_+', '_', filename)  # Consolidar múltiples _
    filename = filename.strip('_')  # Remover _ al inicio/final
    return filename
 def get_clean_base_filename(filename):
    """
    Obtiene el nombre base limpio sin el sufijo de Django.
    """
    normalized = normalize_filename(filename)
    name_without_ext, ext = os.path.splitext(normalized)
    django_suffix = extract_django_suffix(name_without_ext)
    if django_suffix:
        base_name = name_without_ext[:-8] 
    else:
        base_name = name_without_ext
    base_name = re.sub(r'(_copy|_copia|_-_copia|_-_copy)(_\d+)?$', '', base_name)
    return base_name.lower().strip('_')
 def is_same_document(existing_doc, new_filename):
    """
    Compara si un documento existente y un nuevo archivo son el mismo documento.
    Args:
        existing_doc: Objeto Document existente
        new_filename: Nombre del nuevo archivo a subir
    Returns:
        bool: True si son el mismo documento
    """
    existing_basename = os.path.basename(existing_doc.archivo.name)
    existing_base = get_clean_base_filename(existing_basename)
    new_base = get_clean_base_filename(new_filename)
    existing_ext = existing_doc.extension.lower()
    new_ext = os.path.splitext(new_filename)[1].lower().lstrip('.')
    return existing_base == new_base and existing_ext == new_ext
 def extract_django_suffix(filename):
    """
    Extrae el sufijo único que Django añade a los archivos.
    """
    name_without_ext = os.path.splitext(filename)[0]
    match = re.search(r'_([a-zA-Z0-9]{7})$', name_without_ext)
    if match:
        return match.group(1)
    return None
 def get_clean_base_filename(filename):
    """
    Obtiene el nombre base limpio sin el sufijo de Django.
    """
    normalized = normalize_filename(filename)
    name_without_ext, ext = os.path.splitext(normalized)
    django_suffix = extract_django_suffix(name_without_ext)
    if django_suffix:
        base_name = name_without_ext[:-8]  
    else:
        base_name = name_without_ext
    base_name = re.sub(r'(_copy|_copia|_-_copia|_-_copy)(_\d+)?$', '', base_name)
    return base_name.lower().strip('_')