I have documents scanned as .jpg pictures in a folder and I would like to do OCR in C# serially for each of my documents in that folder. so far ive done this :
public string CheckFilesAndDoOCR(string directoryPath)
{
directoryPath = Environment.SpecialFolder.MyPictures + "\\OCRTempPictures\\";
IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
string TheTxt = "";
while (files.MoveNext())
{
// FileInfo
FileInfo nfo = new FileInfo(Convert.ToString(files.Current));
// Get new file name
string fileName = AlltoJPG(nfo);
// FileInfo (New File)
FileInfo foo = new FileInfo(fileName);
// Check for JPG File Format
if (foo.Extension == ".jpg" || foo.Extension == ".JPG")
// or // ImageFormat.Jpeg.ToString()
{
try
{
// OCR Operations...
MODI.Document md = new MODI.Document();
md.Create(foo.FullName);
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, false, false); // OCR();
MODI.Image image = (MODI.Image)md.Images[0];
TheTxt = image.Layout.Text;
md.Close(false);
// Create text file with the same Image file name
FileStream createFile = new FileStream(foo.DirectoryName + "\\" + foo.Name.Replace(foo.Extension,string.Empty) + ".txt", FileMode.CreateNew);
// Save the image text in the text file
StreamWriter writeFile = new StreamWriter(createFile);
writeFile.Write(TheTxt);
writeFile.Close();
}
catch (Exception ex)
{
// Expected errors
string LogPath = System.Environment.SpecialFolder.MyPictures + "\\OCRTempPictures\\OCRInfo.txt";
Logger(LogPath, "| Exception: Source[" + ex.Source + "] Message[" + ex.Message + "] InnerException[" + ex.InnerException + "] StackTrace[" + ex.StackTrace + "] | ");
// MessageBox.Show(ex.Message, "OCR Exception", MessageBoxButtons.OK, MessageBoxIcon.Information);
}
}
}
return TheTxt;
}
but MODI gives the OCR running!
or Cant reach file.File is in use.
errors..
Depending on the situation:
How can I avoid these error?
Is there anyways to stop OCR action and deplete all objects in use?
If anyone can answer any of the questions above it would be appreciated.
Here is the fully working code! thanks to @Ramhound
Below code just specifies a folder full of pictures and one by one does OCR scan on them.
/// <summary>
/// Gets all images inside a Folder
/// and triggers OCR on each..
/// </summary>
/// <param name="directoryPath"> Path to Folder </param>
/// <returns> Text </returns>
public string CheckFileAndDoOCR(string directoryPath)
{
string TheTxt = "";
IEnumerator files = Directory.GetFiles(directoryPath).GetEnumerator();
while (files.MoveNext())
{
// FileInfo
FileInfo foo = new FileInfo(Convert.ToString(files.Current));
// Check for JPG File Format
if (foo.Extension == ".jpg" || foo.Extension == ".JPG")
// or // ImageFormat.Jpeg.ToString()
{
// Start OCR Procedure
TheTxt = DoOCR(foo.FullName);
// Create TXT file next to ImageFile
string txtFileName = foo.DirectoryName + "\\" + foo.Name.Replace(foo.Extension,"") + ".txt";
FileStream createFile = new FileStream(txtFileName, FileMode.OpenOrCreate);
// Save the text in to TXT file
StreamWriter writeFile = new StreamWriter(createFile);
writeFile.Write(TheTxt);
// Close
writeFile.Close();
createFile.Close();
}
// Delete used pictures (Optional)
/*--------------------------------------------------------------------*/
try
{ foo.Delete(); }
catch (Exception ex)
{ Logger(LogPath, "| Exception: Source[" + ex.Source + "] Message[" + ex.Message +
"] InnerException[" + ex.InnerException + "] StackTrace[" + ex.StackTrace + "] | "); }
/*--------------------------------------------------------------------*/
}
return TheTxt;
}
// DoOCR
//
/// <summary>
/// Start an OCR scan on given ImageFile
/// </summary>
/// <param name="FullPath"> Path to ImageFile </param>
/// <returns> Text </returns>
public string DoOCR(string FullPath)
{
string txt;
// OCR Operations...
MODI.Document md = new MODI.Document(); // Create MODI.Document
md.Create(FullPath); // Fill MODI.Document with my file
// Showprogress of OCR
md.OnOCRProgress += new MODI._IDocumentEvents_OnOCRProgressEventHandler(this.ShowProgress);
// Begin OCR
md.OCR(MODI.MiLANGUAGES.miLANG_ENGLISH, false, false); // OCR();
// Image from file
MODI.Image image = (MODI.Image)md.Images[0];
txt = image.Layout.Text;
// Optionally you can get only first word by using word.Text
/// Words from Image :
// MODI.Word word = image.Layout.Words[0];
/// Text from first Word :
// txt = word.Text;
// Close OCR
word = null;
image = null;
md.Close(false);
md = null;
// Finalize
GC.Collect();
GC.WaitForPendingFinalizers();
// Return Text
return txt;
}
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With