I created a little code which searches for a Regex string and replaces that with something else, it then creates a new output file with the changes made. The code seems to work well with smaller files, but for 100 MB or larger files I am giving the System.OutOfMemoryException' error.
Here's my code:
string foldername = Path.Combine(
Environment.GetFolderPath(Environment.SpecialFolder.Desktop),
String.Format("FIXED_{0}.tmx",
Path.GetFileNameWithoutExtension(textBox1.Text)));
string text = File.ReadAllText(textBox1.Text);
text = Regex.Replace(text, @"<seg\b[^>]*>", "<seg>", RegexOptions.Multiline);
text = Regex.Replace(text, @"<seg>
</tuv>", "<seg></seg></tuv>", RegexOptions.Multiline);
File.WriteAllText(foldername, text);
Visual studio highlights the string text = File.ReadAllText(textBox1.Text);
section.
I thought that maybe using File.ReadAllLines would work better, but I was not able to make it work with regex.
Can anybody help me on this? I am newbie to C# and probably my code is not the best one.
I'm afraid you have to implement the replacement by yourself. The following is the sample code which uses state-machine to replace <seg\b[^>*]>
with <seg>
. The only problem it has, that if the file is ended with <seg attr=""
, then only <seg
will be written to the output.
enum TruncateSegState
{
Idle,
TagStart,
TagStartS,
TagStartSE,
TagStartSEG,
TagSEG
}
static void TruncateSeg(StreamReader input, StreamWriter output)
{
TruncateSegState state = TruncateSegState.Idle;
while (!input.EndOfStream)
{
char ch = (char)input.Read();
switch (state)
{
case TruncateSegState.Idle:
if (ch == '<')
state = TruncateSegState.TagStart;
output.Write(ch);
break;
case TruncateSegState.TagStart:
if (ch == 's')
state = TruncateSegState.TagStartS;
else
state = TruncateSegState.Idle;
output.Write(ch);
break;
case TruncateSegState.TagStartS:
if (ch == 'e')
state = TruncateSegState.TagStartSE;
else
state = TruncateSegState.Idle;
output.Write(ch);
break;
case TruncateSegState.TagStartSE:
if (ch == 'g')
state = TruncateSegState.TagStartSEG;
else
state = TruncateSegState.Idle;
output.Write(ch);
break;
case TruncateSegState.TagStartSEG:
if (char.IsWhiteSpace(ch))
state = TruncateSegState.TagSEG;
else
{
state = TruncateSegState.Idle;
output.Write(ch);
}
break;
case TruncateSegState.TagSEG:
if (ch == '>')
{
state = TruncateSegState.Idle;
output.Write(ch);
}
break;
}
}
}
Usage:
using (StreamReader reader = new StreamReader("input.txt"))
using (StreamWriter writer = new StreamWriter("temp.txt"))
TruncateSeg(reader, writer);
After you generated temp.txt
, you use it as input for the next method, which adds missing </seg>
tag.
enum ReplaceSegTuvState
{
Idle,
InsideSEG
}
static void ReplaceSegTuv(StreamReader input, StreamWriter output)
{
ReplaceSegTuvState state = ReplaceSegTuvState.Idle;
StringBuilder segBuffer = new StringBuilder();
while (!input.EndOfStream)
{
char ch = (char)input.Read();
switch (state)
{
case ReplaceSegTuvState.Idle:
if (ch == '<')
{
char[] buffer = new char[4];
int bufferActualLength = input.ReadBlock(buffer, 0, buffer.Length);
output.Write('<');
output.Write(buffer, 0, bufferActualLength);
if (bufferActualLength == buffer.Length && "seg>".SequenceEqual(buffer))
{
segBuffer.Clear();
state = ReplaceSegTuvState.InsideSEG;
}
}
else
output.Write(ch);
break;
case ReplaceSegTuvState.InsideSEG:
if (ch == '<')
{
char[] buffer = new char[5];
int bufferActualLength = input.ReadBlock(buffer, 0, buffer.Length);
if (bufferActualLength == buffer.Length && "/tuv>".SequenceEqual(buffer))
{
output.Write("</seg>");
output.Write("</tuv>");
state = ReplaceSegTuvState.Idle;
}
else
{
output.Write(segBuffer.ToString());
output.Write('<');
output.Write(buffer, 0, bufferActualLength);
state = ReplaceSegTuvState.Idle;
}
}
else if (!char.IsWhiteSpace(ch))
{
output.Write(segBuffer.ToString());
output.Write(ch);
state = ReplaceSegTuvState.Idle;
}
else
segBuffer.Append(ch);
break;
}
}
}
Usage:
using (StreamReader reader = new StreamReader("temp.txt"))
using (StreamWriter writer = new StreamWriter("output.txt"))
ReplaceSegTuv(reader, writer);
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With