In unserem Tipp Text2HTML haben wir Ihnen gezeigt, wie man aus einer Text-Datei eine HTML-Datei macht. Unser heutiger Tipp zeigt den umgekehrten Weg, d.h. es wird gezeigt, wie man eine HTML-Text in Plain-Text umwandelt. Die nachfolgende Routine berücksichtigt auch Image-Tags, an derer Stelle Sie bestimmen, welcher Text eingesetzt werden soll. Hier die Hauptroutine, in der der HTML-Text in Plain-Text umgesetzt wird. Public Function HTML2Text(ByVal OrigHTML As String, _ Optional ByVal ImgText As String = "") As String ' ImgText: Text, der statt Bildern eingefügt wird. ' Sonderinhalte: %imgsrc für Bilddateiname ' %imgalt für Alt-Text Dim CurrChar As String Dim NoHTML As String Dim hRef As String Dim x As Integer Dim z As Integer Dim imgFile As String Dim imgAlt As String Dim Params As String Dim BlockQuote As Boolean On Error Resume Next ' Prüfen, ob <BODY>-Tag vorhanden ' wenn ja befindet sich der relevante HTML-Teil ' zwischen <BODY> und </BODY> If InStr(LCase$(OrigHTML), "<body") > 0 Then OrigHTML = Mid$(OrigHTML, InStr(LCase$(OrigHTML), "<body")) OrigHTML = Mid$(OrigHTML, InStr(OrigHTML, ">") + 1) If InStr(LCase$(OrigHTML), "</body>") > 0 Then _ OrigHTML = Left$(OrigHTML, InStr(LCase$(OrigHTML), _ "</body>") - 1) End If ' HTML-Text zeichenweise "scannen" Do While Len(OrigHTML) CurrChar = Left$(OrigHTML, 1) OrigHTML = Mid$(OrigHTML, 2) Select Case CurrChar Case " " OrigHTML = LTrim$(OrigHTML) Case vbCr, vbLf If Right$(NoHTML, 1) <> " " And _ Right$(NoHTML, 2) <> vbCrLf Then _ CurrChar = " " Else CurrChar = "" If Left$(OrigHTML, 1) = vbLf Then _ OrigHTML = Mid$(OrigHTML, 2) OrigHTML = LTrim$(OrigHTML) ' HTML-Steuerzeichen Case "<" CurrChar = "" If InStr(OrigHTML, ">") > 0 Then CurrChar = Left$(OrigHTML, InStr(OrigHTML, ">") - 1) OrigHTML = Mid$(OrigHTML, InStr(OrigHTML, ">") + 1) If InStr(CurrChar, " ") > 0 Then Params = ReplaceChars(Trim$(Mid$(CurrChar, _ InStr(CurrChar, " ") + 1)), vbCrLf, "") CurrChar = Left$(CurrChar, InStr(CurrChar, " ") - 1) Else Params = "" End If Dim ParamColl As Collection Select Case LCase$(CurrChar) Case "p" If Right$(NoHTML, 4) <> vbCrLf & vbCrLf Then _ CurrChar = vbCrLf & vbCrLf Else CurrChar = "" Case "/div" If Right$(NoHTML, 2) <> vbCrLf Then _ CurrChar = vbCrLf Else CurrChar = "" Case "br" CurrChar = vbCrLf Case "ul", "/ul", "ol", "/ol" CurrChar = vbCrLf Case "li" CurrChar = vbCrLf & " - " Case "blockquote" BlockQuote = True CurrChar = "" Case "/blockquote" BlockQuote = False CurrChar = "" Case "img" If Len(ImgText) > 0 Then Set ParamColl = StripByCharAndQuotes(Params, " ") imgAlt = "" imgFile = "" For z = 1 To ParamColl.Count If LCase$(Left$(ParamColl(z), 4)) = "src=" Then imgFile = StripQuotes(Mid$(ParamColl(z), 5)) ElseIf LCase$(Left$(ParamColl(z), 4)) = "alt=" Then imgAlt = StripQuotes(Mid$(ParamColl(z), 5)) End If If Len(imgAlt) > 0 And Len(imgFile) > 0 Then Exit For Next If Len(Trim$(imgAlt)) = 0 Then _ imgAlt = "Image " & imgFile ImgText = ReplaceChars(ImgText, "%imgalt", imgAlt) ImgText = ReplaceChars(ImgText, "%imgsrc", imgFile) CurrChar = " " & ImgText & " " Else CurrChar = "" End If Case "a" Set ParamColl = StripByCharAndQuotes(Params, " ") hRef = "" For z = 1 To ParamColl.Count If LCase$(Left$(ParamColl(z), 5)) = "href=" Then hRef = StripQuotes(Mid$(ParamColl(z), 6)) Exit For End If Next CurrChar = "" Case "/a" CurrChar = "" If Len(Trim$(hRef)) > 0 Then If LCase$(Left$(hRef, 7)) = "mailto:" Then _ hRef = Mid$(hRef, 8) If LCase$(Right$(Trim$(NoHTML), _ Len(hRef))) <> LCase$(hRef) Then If Not (LCase$(Left$(hRef, 7)) = "http://" And _ LCase$(Right$(Trim$(NoHTML), _ Len(hRef) - 7)) = LCase$(Mid$(hRef, 8))) Then CurrChar = " [" & hRef & "]" hRef = "" End If End If End If Case "hr" CurrChar = vbCrLf For x = 1 To 70 CurrChar = CurrChar & "-" Next CurrChar = CurrChar & vbCrLf Case "sigboundary" CurrChar = vbCrLf & "-- " & vbCrLf Case "script" CurrChar = "" If InStr(LCase$(OrigHTML), "</script>") > 0 Then _ OrigHTML = Mid$(OrigHTML, InStr(LCase$(OrigHTML), _ "</script>")) Case "pre" CurrChar = vbCrLf If InStr(LCase$(OrigHTML), "</pre>") > 0 Then CurrChar = CurrChar & Left$(OrigHTML, _ InStr(LCase$(OrigHTML), "</pre>") - 1) OrigHTML = Mid$(OrigHTML, InStr(LCase$(OrigHTML), _ "</pre>")) End If CurrChar = CurrChar & vbCrLf Case Else CurrChar = "" End Select End If Case "&" If InStr(OrigHTML, ";") > 0 And (InStr(OrigHTML, ";") < _ InStr(OrigHTML, " ") Or InStr(OrigHTML, " ") = 0) Then CurrChar = Left$(OrigHTML, InStr(OrigHTML, ";") - 1) OrigHTML = Mid$(OrigHTML, InStr(OrigHTML, ";") + 1) Select Case CurrChar Case "amp" CurrChar = "&" Case "quot" CurrChar = """" Case "lt" CurrChar = "<" Case "gt" CurrChar = ">" Case "nbsp" CurrChar = " " Case "Auml" CurrChar = "Ä" Case "auml" CurrChar = "ä" Case "iexcl" CurrChar = "¡" Case "cent" CurrChar = "¢" Case "pound" CurrChar = "£" Case "curren" CurrChar = "¤" Case "yen" CurrChar = "¥" Case "brvbar" CurrChar = "|" Case "sect" CurrChar = "§" Case "uml" CurrChar = "¨" Case "copy" CurrChar = "©" Case "ordf" CurrChar = "ª" Case "laquo" CurrChar = "«" Case "not" CurrChar = "¬" Case "reg" CurrChar = "®" Case "macr" CurrChar = "¯" Case "deg" CurrChar = "°" Case "plusm" CurrChar = "±" Case "sup2" CurrChar = "²" Case "sup3" CurrChar = "³" Case "acute" CurrChar = "´" Case "micro" CurrChar = "µ" Case "para" CurrChar = "¶" Case "middot" CurrChar = "·" Case "cedil" CurrChar = "¸" Case "sup1" CurrChar = "¹" Case "ordm" CurrChar = "º" Case "raquo" CurrChar = "»" Case "frac14" CurrChar = "¼" Case "frac12" CurrChar = "½" Case "frac34" CurrChar = "¾" Case "iquest" CurrChar = "¿" Case "Agrave" CurrChar = "À" Case "Aacute" CurrChar = "Á" Case "Acirc" CurrChar = "Â" Case "Atilde" CurrChar = "Ã" Case "Aring" CurrChar = "Å" Case "AElig" CurrChar = "Æ" Case "Ccedil" CurrChar = "Ç" Case "Egrave" CurrChar = "È" Case "Eacute" CurrChar = "É" Case "Ecirc" CurrChar = "Ê" Case "Euml" CurrChar = "Ë" Case "Igrave" CurrChar = "Ì" Case "Iacute" CurrChar = "Í" Case "Icirc" CurrChar = "Î" Case "Iuml" CurrChar = "Ï" Case "ETH" CurrChar = "Ð" Case "Ntilde" CurrChar = "Ñ" Case "Ograve" CurrChar = "Ò" Case "Oacute" CurrChar = "Ó" Case "Ocirc" CurrChar = "Ô" Case "Otilde" CurrChar = "Õ" Case "Ouml" CurrChar = "Ö" Case "times" CurrChar = "×" Case "Oslash" CurrChar = "Ø" Case "Ugrave" CurrChar = "Ù" Case "Uacute" CurrChar = "Ú" Case "Ucirc" CurrChar = "Û" Case "Uuml" CurrChar = "Ü" Case "Yacute" CurrChar = "Ý" Case "THORN" CurrChar = "Þ" Case "szlig" CurrChar = "ß" Case "agrave" CurrChar = "à" Case "aacute" CurrChar = "á" Case "acirc" CurrChar = "â" Case "atilde" CurrChar = "ã" Case "aring" CurrChar = "å" Case "aelig" CurrChar = "æ" Case "ccedil" CurrChar = "ç" Case "egrave" CurrChar = "è" Case "eacute" CurrChar = "é" Case "ecirc" CurrChar = "ê" Case "euml" CurrChar = "ë" Case "igrave" CurrChar = "ì" Case "iacute" CurrChar = "í" Case "icirc" CurrChar = "î" Case "iuml" CurrChar = "ï" Case "eth" CurrChar = "ð" Case "ntilde" CurrChar = "ñ" Case "ograve" CurrChar = "ò" Case "oacute" CurrChar = "ó" Case "ocirc" CurrChar = "ô" Case "otilde" CurrChar = "õ" Case "ouml" CurrChar = "ö" Case "divide" CurrChar = "÷" Case "oslash" CurrChar = "ø" Case "ugrave" CurrChar = "ù" Case "uacute" CurrChar = "ú" Case "ucirc" CurrChar = "û" Case "uuml" CurrChar = "ü" Case "yacute" CurrChar = "ý" Case "thorn" CurrChar = "þ" Case "yuml" CurrChar = "ÿ" Case Else CurrChar = "&" & CurrChar & ";" End Select End If End Select If Right$(CurrChar, 2) = vbCrLf And BlockQuote Then _ CurrChar = CurrChar & "> " NoHTML = NoHTML & CurrChar Loop NoHTML = Trim$(NoHTML) Do While Left$(NoHTML, 2) = vbCrLf NoHTML = Trim$(Mid$(NoHTML, 3)) Loop Do While Right$(NoHTML, 2) = vbCrLf NoHTML = Trim$(Left$(NoHTML, Len(NoHTML) - 2)) Loop HTML2Text = NoHTML End Function Die nachfolgende Routine ersetzt einen Teilstring in einem Originalstring. Public Function ReplaceChars(ByVal OrgString _ As String, ByVal ToReplace As String, _ ByVal ToInsert As String) As String Dim ST As Long Dim newst As Long ST = 0 Do While InStr(ST + 1, LCase$(OrgString), LCase$(ToReplace)) <> 0 newst = InStr(ST + 1, LCase$(OrgString), LCase$(ToReplace)) OrgString = Left$(OrgString, InStr(ST + 1, LCase$(OrgString), _ LCase$(ToReplace)) - 1) & ToInsert & Mid$(OrgString, _ InStr(ST + 1, LCase$(OrgString), LCase$(ToReplace)) + _ Len(ToReplace)) ST& = newst + Len(ToInsert) - 1 Loop ReplaceChars = OrgString End Function Function StripByCharAndQuotes(ByVal OrgString _ As String, StripChar As String) As Collection Dim Scoll As Collection Dim x As Long, Y As Long Set Scoll = New Collection Do x = InStr(OrgString, StripChar) Y = InStr(OrgString, Chr$(34)) If Y > 0 And Y < x Then Y = InStr(Y + 1, OrgString, Chr$(34)) Do While Y > x And Y > 0 And x > 0 x = InStr(x + 1, OrgString, StripChar) Loop End If If x > 0 Then Scoll.Add Left$(OrgString, x - 1) OrgString = Mid$(OrgString, x + 1) Else Scoll.Add OrgString OrgString = "" End If Loop While Len(OrgString) > 0 Set StripByCharAndQuotes = Scoll Set Scoll = Nothing End Function Die nachfolgende Funktion entfernt überflüssige Anführungszeichen am Anfang und Ende eines Strings Private Function StripQuotes(ByVal sText _ As String) As String If Left$(sText, 1) = Chr$(34) Then _ sText = Mid$(sText, 2) If Right$(sText, 1) = Chr$(34) Then _ sText = Left$(sText, Len(sText) - 1) End Function Damit Sie den gesamten Quellcode jetzt nicht manuell in Ihr Projekt übernehmen müssen, haben wir diesen in ein Modul gepackt (siehe Beispielsprojekt). Dieser Tipp wurde bereits 25.179 mal aufgerufen. Voriger Tipp | Zufälliger Tipp | Nächster Tipp
Anzeige
Diesen und auch alle anderen Tipps & Tricks finden Sie auch auf unserer aktuellen vb@rchiv Vol.6 (einschl. Beispielprojekt!) Ein absolutes Muss - Geballtes Wissen aus mehr als 8 Jahren vb@rchiv! - nahezu alle Tipps & Tricks und Workshops mit Beispielprojekten - Symbol-Galerie mit mehr als 3.200 Icons im modernen Look Weitere Infos - 4 Entwickler-Vollversionen (u.a. sevFTP für .NET), Online-Update-Funktion u.v.m. |
TOP! Unser Nr. 1 Neu! sevDataGrid 3.0 Mehrspaltige Listen, mit oder ohne DB-Anbindung. Autom. Sortierung, Editieren von Spalteninhalten oder das interaktive Hinzufügen von Datenzeilen sind ebenso möglich wie das Erstellen eines Web-Reports. Tipp des Monats April 2024 Skyfloy Chart von Microsoft und dazu noch gratis Tutorial für Microsoft Chart Controls für Microsoft .NET Framework 3.5 Access-Tools Vol.1 Über 400 MByte Inhalt Mehr als 250 Access-Beispiele, 25 Add-Ins und ActiveX-Komponenten, 16 VB-Projekt inkl. Source, mehr als 320 Tipps & Tricks für Access und VB |
||||||||||||||||
Microsoft, Windows und Visual Basic sind entweder eingetragene Marken oder Marken der Microsoft Corporation in den USA und/oder anderen Ländern. Weitere auf dieser Homepage aufgeführten Produkt- und Firmennamen können geschützte Marken ihrer jeweiligen Inhaber sein. |