VB icon

Get the Text from DOC,RTF,HTML

Submitted on: 1/1/2015 4:32:00 AM
By: Holger Kaslowitz (from psc cd)  
Level: Intermediate
User Rating: By 2 Users
Compatibility: VB.NET
Views: 2967
     With this Code, you can get the plaintext, from a DOC, RTF or HTML File
Can't Copy and Paste this?
Click here for a copy-and-paste friendly version of this code!
// Name: Get the Text from DOC,RTF,HTML
// Description:With this Code, you can get the plaintext, from a DOC, RTF or HTML File
// By: Holger Kaslowitz (from psc cd)
// Assumes:The HTML Routine is not Perfect

Public Class File2String
Private WithEvents RichText As New Windows.Forms.RichTextBox()
Public Function FromRTF(ByVal File As String) As String
Return Replace(RichText.Text, Chr(10), vbCrLf)
End Function
Public Function FromDOC(ByVal File As String) As String
Dim TempString As String
TempString = OpenTexFile(File)
Dim LastPos As Integer = InStrRev(TempString, vbCrLf & vbCrLf)
Dim FirstPos As Integer = InStrRev(TempString, "", LastPos) + 1
TempString = Mid(TempString, FirstPos, LastPos - FirstPos)
TempString = Replace(TempString, "F" & Chr(9), "")
TempString = Replace(TempString, "e'", "")
TempString = Mid(TempString, InStrRev(TempString, Chr(1)) + 1)
Return TempString
End Function
Public Function FromHTML(ByVal File As String) As String
RichText.LoadFile(File, RichTextBoxStreamType.PlainText)
Dim Lastpos As Integer = 1
Dim LastPos2 As Integer = 1
Dim TempText As String
Do While Lastpos < Len(RichText.Text) - 5
Lastpos = InStr(Lastpos + 1, RichText.Text, ">")
LastPos2 = InStr(Lastpos, RichText.Text, "<")
If LastPos2 <> 0 Then
TempText = TempText & Mid(RichText.Text, Lastpos, LastPos2 - Lastpos)
End If
TempText = Replace(TempText, "", "")
TempText = Replace(TempText, vbCrLf, "")
TempText = Replace(TempText, ">", "")
TempText = Replace(TempText, Chr(10), " ")
TempText = Replace(TempText, Chr(9), "")
Return Trim(TempText)
End Function
Public Function OpenTexFile(ByVal Fil As String) As String
Dim Text As String
Dim Textfile As System.IO.FileStream = System.IO.File.OpenRead(Fil)
Dim i As Long
Dim TempBytes(Textfile.Length) As Byte
Textfile.Read(TempBytes, 0, Textfile.Length)
For i = 0 To TempBytes.Length - 1
If TempBytes(i) = 0 Then
ElseIf TempBytes(i) = 13 Then
Text = Text & vbCrLf
Text = Text & Chr(TempBytes(i))
End If
Return Text
End Function
End Class

Report Bad Submission
Use this form to tell us if this entry should be deleted (i.e contains no code, is a virus, etc.).
This submission should be removed because:

Your Vote

What do you think of this code (in the Intermediate category)?
(The code with your highest vote will win this month's coding contest!)
Excellent  Good  Average  Below Average  Poor (See voting log ...)

Other User Comments

 There are no comments on this submission.

Add Your Feedback
Your feedback will be posted below and an email sent to the author. Please remember that the author was kind enough to share this with you, so any criticisms must be stated politely, or they will be deleted. (For feedback not related to this particular code, please click here instead.)

To post feedback, first please login.