Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

VB6/VBScript change file encoding to ansi

I am looking for a way to convert a textfile with UTF8 encoding to ANSI encoding.

How can i go around and achieve this in Visual Basic (VB6) and or vbscript?

like image 898
HerbalMart Avatar asked Mar 03 '11 14:03

HerbalMart


1 Answers

I'm using this script to convert any character set or code page (that i'm aware of).

This script can also handle large files (over one gigabytes), because it streams one line at a time.

' - ConvertCharset.vbs -
'
' Inspired by: 
' http://www.vbforums.com/showthread.php?533879-Generate-text-files-in-IBM-850-encoding
' http://stackoverflow.com/questions/5182102/vb6-vbscript-change-file-encoding-to-ansii/5186170#5186170
' http://stackoverflow.com/questions/13130214/how-to-convert-a-batch-file-stored-in-utf-8-to-something-that-works-via-another
' 
' Start Main
Dim objArguments
Dim strSyntaxtext, strInputCharset, strOutputCharset, strInputFile, strOutputFile 
Dim intReadPosition, intWritePosition
Dim arrCharsets

Const adReadAll = -1
Const adReadLine = -2
Const adSaveCreateOverWrite = 2
Const adSaveCreateNotExist = 1
Const adTypeBinary = 1
Const adTypeText = 2
Const adWriteChar = 0
Const adWriteLine = 1

strSyntaxtext = strSyntaxtext & "Converts the charset of the input text file to output file." & vbCrLf
strSyntaxtext = strSyntaxtext & "Syntax: "  & vbCrLf
strSyntaxtext = strSyntaxtext & WScript.ScriptName & " /InputCharset:utf-8|windows-1252|ibm850|..." & vbCrLf
strSyntaxtext = strSyntaxtext & "              /OutputCharset:utf-8|windows-1252|ibm850|..." & vbCrLf 
strSyntaxtext = strSyntaxtext & "              /InputFile:\\path\to\inputfile.ext" & vbCrLf 
strSyntaxtext = strSyntaxtext & "              /OutputFile:\\path\to\outputfile.ext" & vbCrLf 
strSyntaxtext = strSyntaxtext & "              [/ShowAllCharSets]" & vbCrLf & vbCrLf 
strSyntaxtext = strSyntaxtext & "Example:" & vbCrLf
strSyntaxtext = strSyntaxtext & WScript.ScriptName & " /InputCharset:ibm850 /OutputCharset:utf-8 /InputFile:my_dos.txt /OutputFile:my_utf-8.txt" & vbCrLf

Set objArgumentsNamed = WScript.Arguments.Named
If objArgumentsNamed.Count = 0  Then 
   WScript.Echo strSyntaxtext
   WScript.Quit(99)
End If

arrCharsets = Split("big5,big5-hkscs,euc-jp,euc-kr,gb18030,gb2312,gbk,ibm-thai," &_
                    "ibm00858,ibm01140,ibm01141,ibm01142,ibm01143,ibm01144," &_
                    "ibm01145,ibm01146,ibm01147,ibm01148,ibm01149,ibm037," &_
                    "ibm1026,ibm273,ibm277,ibm278,ibm280,ibm284,ibm285,ibm297," &_
                    "ibm420,ibm424,ibm437,ibm500,ibm775,ibm850,ibm852,ibm855," &_
                    "ibm857,ibm860,ibm861,ibm862,ibm863,ibm864,ibm865,ibm866," &_
                    "ibm869,ibm870,ibm871,iso-2022-jp,iso-2022-kr,iso-8859-1," &_
                    "iso-8859-13,iso-8859-15,iso-8859-2,iso-8859-3,iso-8859-4," &_
                    "iso-8859-5,iso-8859-6,iso-8859-7,iso-8859-8,iso-8859-9," &_
                    "koi8-r,koi8-u,shift_jis,tis-620,us-ascii,utf-16,utf-16be," &_
                    "utf-16le,utf-7,utf-8,windows-1250,windows-1251,windows-1252," &_
                    "windows-1253,windows-1254,windows-1255,windows-1256," &_
                    "windows-1257,windows-1258,unicode", ",")

Set objFileSystem = CreateObject("Scripting.FileSystemObject")

For Each objArgumentNamed in objArgumentsNamed
   Select Case Lcase(objArgumentNamed)
      Case "inputcharset"
         strInputCharset = LCase(objArgumentsNamed(objArgumentNamed))
         If Not IsCharset(strInputCharset) Then 
            WScript.Echo "The InputCharset (" & strInputCharset & ") is not valid, quitting. The valid charsets are:"  & vbCrLf
            x = ShowCharsets()
            WScript.Quit(1)
         End If
      Case "outputcharset"
         strOutputCharset = LCase(objArgumentsNamed(objArgumentNamed))
         If Not IsCharset(strOutputCharset) Then 
            WScript.Echo "The strOutputCharset (" & strOutputCharset & ") is not valid, quitting. The valid charsets are:"  & vbCrLf
            x = ShowCharsets()
            WScript.Quit(2)
         End If
      Case "inputfile"
         strInputFile = LCase(objArgumentsNamed(objArgumentNamed))
         If Not objFileSystem.FileExists(strInputFile) Then  
            WScript.Echo "The InputFile (" & strInputFile  & ") does not exist, quitting."  & vbCrLf
            WScript.Quit(3)
         End If
      Case "outputfile"
         strOutputFile = LCase(objArgumentsNamed(objArgumentNamed))
         If objFileSystem.FileExists(strOutputFile) Then  
            WScript.Echo "The OutputFile  (" & strOutputFile & ") exists, quitting."  & vbCrLf
            WScript.Quit(4)
         End If
      Case "showallcharsets"
         x = ShowCharsets()
      Case Else
         WScript.Echo "Unknown parameter, quitting: /" & objArgumentNamed & ":" & objArgumentsNamed(objArgumentNamed)
         WScript.Echo strSyntaxtext
   End Select 
Next

If Len(strInputCharset) > 0 And Len(strOutputCharset) > 0 And Len(strInputFile) > 0 And Len(strOutputFile) Then 
   Set objInputStream = CreateObject("ADODB.Stream")
   Set objOutputStream = CreateObject("ADODB.Stream")

   With objInputStream
      .Open
      .Type = adTypeBinary
      .LoadFromFile strInputFile
      .Type = adTypeText
      .Charset = strInputCharset
      intWritePosition = 0
      objOutputStream.Open
      objOutputStream.Charset = strOutputCharset
      Do While .EOS <> True
         strText = .ReadText(adReadLine)
         objOutputStream.WriteText strText, adWriteLine
      Loop
      .Close
   End With
   objOutputStream.SaveToFile strOutputFile , adSaveCreateNotExist
   objOutputStream.Close
   WScript.Echo "The " & objFileSystem.GetFileName(strInputFile) & " was converted to "  & objFileSystem.GetFileName(strOutputFile) & " OK."
End If
' End Main

' Start Functions 

Function IsCharset(strMyCharset)
IsCharset = False
For Each strCharset in arrCharsets
   If strCharset = strMyCharset Then 
      IsCharset = True
      Exit For
   End If
Next
End Function 

Function ShowCharsets()
strDisplayCharsets = ""
intCounter = 0
For Each strcharset in arrCharsets
   intCounter = intCounter + Len(strcharset) + 1
   strDisplayCharsets = strDisplayCharsets & strcharset & ","
   If intCounter > 67 Then 
      intCounter = 0
      strDisplayCharsets = strDisplayCharsets & vbCrLf 
   End If
Next
strDisplayCharsets = Mid(strDisplayCharsets, 1, Len(strDisplayCharsets)-1)
WScript.Echo strDisplayCharsets 
End Function 
' End Functions 
like image 144
Ciove Avatar answered Sep 24 '22 18:09

Ciove