I'm working with an Excel list and want to turn:
Quercus agrifolia var. oxyadenia (Torr.) J.T. Howell
into:
<i>Quercus agrifolia</i> var. <i>oxyadenia</i> (Torr.) J.T. Howell
I have the Rich Text formatted list with formatting applied but I want to send it to Access with the formatting tags explicitly included around the related text.
I was looking to do the same thing, and found an answer on MSDN at: Convert contents of a formatted excel cell to HTML format
I hope this helps you as well, it uses an excel macro.
Edit: When using this I needed to modify the code for nested tags, please find my updates to the macro below:
Function fnConvert2HTML(myCell As Range) As String
Dim bldTagOn, itlTagOn, ulnTagOn, colTagOn As Boolean
Dim i, chrCount As Integer
Dim chrCol, chrLastCol, htmlTxt, htmlEnd As String
bldTagOn = False
itlTagOn = False
ulnTagOn = False
colTagOn = False
chrCol = "NONE"
'htmlTxt = "<html>"
htmlTxt = ""
chrCount = myCell.Characters.Count
For i = 1 To chrCount
htmlEnd = ""
With myCell.Characters(i, 1)
If (.Font.Color) Then
chrCol = fnGetCol(.Font.Color)
If Not colTagOn Then
htmlTxt = htmlTxt & "<font color=#" & chrCol & ">"
colTagOn = True
Else
If chrCol <> chrLastCol Then htmlTxt = htmlTxt & "</font><font color=#" & chrCol & ">"
End If
Else
chrCol = "NONE"
If colTagOn Then
htmlEnd = "</font>" & htmlEnd
'htmlTxt = htmlTxt & "</font>"
colTagOn = False
End If
End If
chrLastCol = chrCol
If .Font.Bold = True Then
If Not bldTagOn Then
htmlTxt = htmlTxt & "<b>"
bldTagOn = True
End If
Else
If bldTagOn Then
'htmlTxt = htmlTxt & "</b>"
htmlEnd = "</b>" & htmlEnd
bldTagOn = False
End If
End If
If .Font.Italic = True Then
If Not itlTagOn Then
htmlTxt = htmlTxt & "<i>"
itlTagOn = True
End If
Else
If itlTagOn Then
'htmlTxt = htmlTxt & "</i>"
htmlEnd = "</i>" & htmlEnd
itlTagOn = False
End If
End If
If .Font.Underline > 0 Then
If Not ulnTagOn Then
htmlTxt = htmlTxt & "<u>"
ulnTagOn = True
End If
Else
If ulnTagOn Then
'htmlTxt = htmlTxt & "</u>"
htmlEnd = "</u>" & htmlEnd
ulnTagOn = False
End If
End If
If (Asc(.Text) = 10) Then
htmlTxt = htmlTxt & htmlEnd & "<br>"
Else
htmlTxt = htmlTxt & htmlEnd & .Text
End If
End With
Next
If colTagOn Then
htmlTxt = htmlTxt & "</font>"
colTagOn = False
End If
If bldTagOn Then
htmlTxt = htmlTxt & "</b>"
bldTagOn = False
End If
If itlTagOn Then
htmlTxt = htmlTxt & "</i>"
itlTagOn = False
End If
If ulnTagOn Then
htmlTxt = htmlTxt & "</u>"
ulnTagOn = False
End If
'htmlTxt = htmlTxt & "</html>"
fnConvert2HTML = htmlTxt
End Function
Function fnGetCol(strCol As String) As String
Dim rVal, gVal, bVal As String
strCol = Right("000000" & Hex(strCol), 6)
bVal = Left(strCol, 2)
gVal = Mid(strCol, 3, 2)
rVal = Right(strCol, 2)
fnGetCol = rVal & gVal & bVal
End Function
Here's an alternative solution which is faster, but produces messier output (because it uses Word's HTML engine). You need to add the following references to your VBA project:
Then, call the following code by running eg. convertToHtml(Range("A1:A100"))
in the immediate window:
' Given a temporary file path, load the HTML in that file
' and return the first paragraph's inner HTML.
Function extractFirstParagraph(filePath As String) As String
Dim fs As New FileSystemObject, _
html As New MSHTML.HTMLDocument, _
par As MSHTML.HTMLGenericElement
html.body.innerHTML = fs.OpenTextFile(filePath).ReadAll
Set par = html.getElementsByTagName("P")(0)
extractFirstParagraph = par.innerHTML
End Function
Sub convertToHtml(rng As Range)
' Open a single Word instance.
Dim w As New Word.Application, d As Word.Document
Set d = w.Documents.Add
Dim cell As Range
Const tempFile As String = "c:\temp\msword.html"
' For each cell in the range ...
For Each cell In rng
If cell.Value <> "" Then
' ... copy it into the Word document ...
cell.Copy
d.Range.PasteSpecial xlPasteFormats
' ... save the Word document as HTML
' in a temporary file ...
d.SaveAs2 tempFile, wdFormatHTML
' ... and extract the first paragraph.
cell.Value = extractFirstParagraph(tempFile)
Debug.Print "Cell " & cell.Address & " done."
End If
Next cell
' Close Word once you're done. Note that if a bug
' is encountered, this cleanup won't occur and the
' Word process will need to be killed to release
' file locks, otherwise you get an unhelpful error.
w.Quit False
End Sub
You can clean up the output using regular expressions by adding a reference to Microsoft VBScript Regular Expressions 5.5, and running a function like this:
' Used to avoid duplication in cleanWordHtml.
Function eraseInPlace(ByRef r As RegExp, _
ByRef s As String, p As String) As String
r.Pattern = p
s = r.Replace(s, "")
End Function
' Eliminate junk tags from HTML generated by Word.
Function cleanWordHtml(inputString As String)
Dim r As New RegExp
r.Global = True
eraseInPlace r, inputString, "mso-[^;""]*(; )?"
eraseInPlace r, inputString, " style="""""
eraseInPlace r, inputString, "<\?xml[^>]*>"
eraseInPlace r, inputString, "<\/?o:[^>]*>"
eraseInPlace r, inputString, "<SPAN><\/SPAN>"
cleanWordHtml = inputString
End Function
If you need to convert <span>
tags to <font>
tags (I also needed to do this because I was importing into an Access rich text field, which doesn't support CSS), try calling this function and passing in the MSHTML objects constructed in the extractFirstParagraph
function:
' Given a <p> DOM node, replace any children of the
' form <span style="color: foo"> with <font color="foo">.
Function convertSpanToFont(ByRef par As MSHTML.HTMLGenericElement, _
ByRef doc As MSHTML.HTMLDocument)
Dim span As MSHTML.HTMLSpanElement, _
font As MSHTML.HTMLFontElement
For Each span In par.getElementsByTagName("span")
Set font = doc.createElement("font")
If IsNull(span.Style.Color) _
Or span.Style.Color <> "" Then
font.Color = span.Style.Color
font.innerHTML = span.innerHTML
span.insertAdjacentElement "afterEnd", font
span.removeNode True
End If
Next span
End Function
I also considered just saving the whole spreadsheet as HTML from Excel and then using another tool to get that into a format that Access can deal with, but Excel's HTML export generates CSS classes rather than inline styles. This method is also helpful if you only need to convert part of your spreadsheet to HTML.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With