Asunto: Macro VBA para limpiar texto de PDF a DOC o Txt
Macro VBA para Word que limpia retornos de carro anómalos en texto procedente de Word. No es perfecta pero al menos hace el 90% del trabajo.
Sub JoinLowercaseLine()
' Macro Word para limpiar retornos de carro de texto de PDF
Rem PASO 1. Sustituir espacios de no separación por espacios normales.
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^s"
.Replacement.Text = " "
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
Rem PASO 4.1 Elimino espacios antes de minuscula.
With Selection.Find
.Text = "[^13^l^t] {1;}([a-z])"
.Replacement.Text = "\1"
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
Selection.Find.Execute Replace:=wdReplaceAll
' Mio
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^phttp"
.Replacement.Text = "^pWeb: http"
.Forward = True
.Wrap = wdFindAsk
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
Selection.Find.ClearFormatting
Selection.Find.Replacement.ClearFormatting
With Selection.Find
.Text = "^pwww"
.Replacement.Text = "^pWeb: www"
.Forward = True
.Wrap = wdFindAsk
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchWildcards = False
.MatchSoundsLike = False
.MatchAllWordForms = False
End With
Selection.Find.Execute Replace:=wdReplaceAll
' fin mio
Rem PASO 2. Segundo, unimos a la anterior linea las lineas que empiezan por lowercase (minuscula).
With Selection.Find
.Text = "([^13^l^t])([a-z])"
.Replacement.Text = " \2"
' .Replacement.Text = "\1$popo$\2"
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
Selection.Find.Execute Replace:=wdReplaceAll
Rem PASO 3. Eliminar todos los espacios redundantes.
With Selection.Find
.Text = "( ){1;}"
.Replacement.Text = "\1"
.Forward = True
.Wrap = wdFindContinue
.Format = False
.MatchCase = False
.MatchWholeWord = False
.MatchAllWordForms = False
.MatchSoundsLike = False
.MatchWildcards = True
End With
Selection.Find.Execute Replace:=wdReplaceAll
End Sub
|