Macro VBA para limpiar texto de PDF a DOC o Txt
-> Lenguajes de programacion y Bases de Datos

#1: Macro VBA para limpiar texto de PDF a DOC o Txt Autor: aforo EnvioPublicado: Vie Feb 16, 2018 7:44 pm
    ----
Macro VBA para Word que limpia retornos de carro anómalos en texto procedente de Word. No es perfecta pero al menos hace el 90% del trabajo.

Code::

Sub JoinLowercaseLine()
' Macro Word para limpiar retornos de carro de texto de PDF
    Rem PASO 1. Sustituir espacios de no separación por espacios normales.
    Selection.Find.ClearFormatting
    Selection.Find.Replacement.ClearFormatting
    With Selection.Find
        .Text = "^s"
        .Replacement.Text = " "
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Rem PASO 4.1 Elimino espacios antes de minuscula.
    With Selection.Find
        .Text = "[^13^l^t] {1;}([a-z])"
        .Replacement.Text = "\1"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    
' Mio
    Selection.Find.ClearFormatting
    Selection.Find.Replacement.ClearFormatting
    With Selection.Find
        .Text = "^phttp"
        .Replacement.Text = "^pWeb: http"
        .Forward = True
        .Wrap = wdFindAsk
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    
    Selection.Find.ClearFormatting
    Selection.Find.Replacement.ClearFormatting
    With Selection.Find
        .Text = "^pwww"
        .Replacement.Text = "^pWeb: www"
        .Forward = True
        .Wrap = wdFindAsk
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchWildcards = False
        .MatchSoundsLike = False
        .MatchAllWordForms = False
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
' fin mio

    Rem PASO 2. Segundo, unimos a la anterior linea las lineas que empiezan por lowercase (minuscula).
    With Selection.Find
        .Text = "([^13^l^t])([a-z])"
        .Replacement.Text = " \2"
        ' .Replacement.Text = "\1$popo$\2"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
    
    Rem PASO 3. Eliminar todos los espacios redundantes.
    With Selection.Find
        .Text = "( ){1;}"
        .Replacement.Text = "\1"
        .Forward = True
        .Wrap = wdFindContinue
        .Format = False
        .MatchCase = False
        .MatchWholeWord = False
        .MatchAllWordForms = False
        .MatchSoundsLike = False
        .MatchWildcards = True
    End With
    Selection.Find.Execute Replace:=wdReplaceAll
End Sub




-> Lenguajes de programacion y Bases de Datos

Todas las horas son GMT + 1 Hora

Página 1 de 1