I have a few Word documents, each containing a few hundreds of pages of scientific data which includes:
Problem is, storing this data in the form of Word is not efficient for us. So we want to store all this information in a Database (MySQL). We want to convert these formatting to LaTex.
Is there any way to iterate through all the subcripts & superscripts & Equations using VBA?
What about iterating through mathematical equations?
Based on your comment on Michael's answer
No! I just want to replace content in the subscript with _{ subscriptcontent } and similarly superscript content with ^{ superscriptcontent }. That would be the Tex equivalent. Now, I'll just copy everything to a text file which will remove the formatting but leaves these characters. Problem solved. But for that I need to access the subscript & superscript objects of document
Sub sampler()
Selection.HomeKey wdStory
With Selection.find
.ClearFormatting
.Replacement.ClearFormatting
.Font.Superscript = True
.Replacement.Text = "^^{^&}"
.Execute Replace:=wdReplaceAll
.Font.Subscript = True
.Replacement.Text = "_{^&}"
.Execute Replace:=wdReplaceAll
End With
End Sub
EDIT
Or If you also want to convert OMaths
to TeX / LaTeX
, then do something like:
For a completely different idea visit David Carlisle's blog, that might interest you.
UPDATE
The module
Option Explicit
'This module requires the following references:
'Microsoft Scripting Runtime
'MicroSoft XML, v6.0
Private fso As New Scripting.FileSystemObject
Private omml2mml$, mml2Tex$
Public Function ProcessFile(fpath$) As Boolean
'convPath set to my system at (may vary on your system):
omml2mml = "c:\program files\microsoft office\office14\omml2mml.xsl"
'download: http://prdownloads.sourceforge.net/xsltml/xsltml_2.0.zip
'unzip at «c:\xsltml_2.0»
mml2Tex = "c:\xsltml_2.0\mmltex.xsl"
Documents.Open fpath
'Superscript + Subscript
Selection.HomeKey wdStory
With Selection.find
.ClearFormatting
.Replacement.ClearFormatting
'to make sure no paragraph should contain any emphasis
.Text = "^p"
.Replacement.Text = "^&"
.Replacement.Font.Italic = False
.Replacement.Font.Bold = False
.Replacement.Font.Superscript = False
.Replacement.Font.Subscript = False
.Replacement.Font.SmallCaps = False
.Execute Replace:=wdReplaceAll
.Font.Italic = True
.Replacement.Text = "\textit{^&}"
.Execute Replace:=wdReplaceAll
.Font.Bold = True
.Replacement.Text = "\textbf{^&}"
.Execute Replace:=wdReplaceAll
.Font.SmallCaps = True
.Replacement.Text = "\textsc{^&}"
.Execute Replace:=wdReplaceAll
.Font.Superscript = True
.Replacement.Text = "^^{^&}"
.Execute Replace:=wdReplaceAll
.Font.Subscript = True
.Replacement.Text = "_{^&}"
.Execute Replace:=wdReplaceAll
End With
Dim dict As New Scripting.Dictionary
Dim om As OMath, t, counter&, key$
key = Replace(LCase(Dir(fpath)), " ", "_omath_")
counter = 0
For Each om In ActiveDocument.OMaths
DoEvents
counter = counter + 1
Dim tKey$, texCode$
tKey = "<" & key & "_" & counter & ">"
t = om.Range.WordOpenXML
texCode = TransformString(TransformString(CStr(t), omml2mml), mml2Tex)
om.Range.Select
Selection.Delete
Selection.Text = tKey
dict.Add tKey, texCode
Next om
Dim latexDoc$, oPath$
latexDoc = "\documentclass[10pt]{article}" & vbCrLf & _
"\usepackage[utf8]{inputenc} % set input encoding" & vbCrLf & _
"\usepackage{amsmath,amssymb}" & vbCrLf & _
"\begin{document}" & vbCrLf & _
"###" & vbCrLf & _
"\end{document}"
oPath = StrReverse(Mid(StrReverse(fpath), InStr(StrReverse(fpath), "."))) & "tex"
'ActiveDocument.SaveAs FileName:=oPath, FileFormat:=wdFormatText, Encoding:=1200
'ActiveDocument.SaveAs FileName:=oPath, FileFormat:=wdFormatText, Encoding:=65001
ActiveDocument.Close
Dim c$, i
c = fso.OpenTextFile(oPath).ReadAll()
counter = 0
For Each i In dict
counter = counter + 1
Dim findText$, replaceWith$
findText = CStr(i)
replaceWith = dict.item(i)
c = Replace(c, findText, replaceWith, 1, 1, vbTextCompare)
Next i
latexDoc = Replace(latexDoc, "###", c)
Dim ost As TextStream
Set ost = fso.CreateTextFile(oPath)
ost.Write latexDoc
ProcessFile = True
End Function
Private Function CreateDOM()
Dim dom As New DOMDocument60
With dom
.async = False
.validateOnParse = False
.resolveExternals = False
End With
Set CreateDOM = dom
End Function
Private Function TransformString(xmlString$, xslPath$) As String
Dim xml, xsl, out
Set xml = CreateDOM
xml.LoadXML xmlString
Set xsl = CreateDOM
xsl.Load xslPath
out = xml.transformNode(xsl)
TransformString = out
End Function
The calling(from immediate window):
?ProcessFile("c:\test.doc")
The result would be created as test.tex
in c:\
.
The module may need to fix some places. If so let me know.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With