Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Multithreading within a loop c#

I am making a tool using c# that iterates through a large file directory and extracts certain information. The directory is organised by language (LCID), so I want to use multithreading to go through the directory- one thread per language folder.

My code currently scans through a small number of the files and extracts the required data without multithreading, but on a large scale it will take too long.

I set up a thread within my loop that gets the LCID folders, but got the following error: "no overload for 'HBscan' matches delegate System.threading.threadstart". From what I read online, I then put my method within a class so I could have parameters, and now there's no errors but the code is not iterating through the files properly. It is leaving files out of it's scan.

I was wondering if anyone could see where I was going wrong with my code that's making it not perform properly? Thanks.

public static void Main(string[] args)
    {
        //change rootDirectory variable to point to directory which you wish to scan through
        string rootDirectory = @"C:\sample";
        DirectoryInfo dir = new DirectoryInfo(rootDirectory);

        //get the LCIDs from the folders
        string[] filePaths = Directory.GetDirectories(rootDirectory);
        for (int i = 0; i < filePaths.Length; i++)
        {
            string LCID = filePaths[i].Split('\\').Last();
            Console.WriteLine(LCID);

            HBScanner scanner = new HBScanner(new DirectoryInfo(filePaths[i]));
            Thread t1 = new Thread(new ThreadStart(scanner.HBscan));              
            t1.Start();             
        } 

        Console.WriteLine("Scanning through files...");

    }
    public class HBScanner
    {
        private DirectoryInfo DirectoryToScan { get; set; }

        public HBScanner(DirectoryInfo startDir)
        {
            DirectoryToScan = startDir;
        }

        public void HBscan()
        {
            HBscan(DirectoryToScan);
        } 

        public static void HBscan(DirectoryInfo directoryToScan)
        {
            //create an array of files using FileInfo object
            FileInfo[] files;
            //get all files for the current directory
            files = directoryToScan.GetFiles("*.*");
            string asset = "";
            string lcid = "";

            //iterate through the directory and get file details
            foreach (FileInfo file in files)
            {
                String name = file.Name;
                DateTime lastModified = file.LastWriteTime;
                String path = file.FullName;

                //first check the file name for asset id using regular expression
                Regex regEx = new Regex(@"([A-Z][A-Z][0-9]{8,10})\.");
                asset = regEx.Match(file.Name).Groups[1].Value.ToString();

                //get LCID from the file path using regular expression
                Regex LCIDregEx = new Regex(@"sample\\(\d{4,5})");
                lcid = LCIDregEx.Match(file.FullName).Groups[1].Value.ToString();

                //if it can't find it from filename, it looks into xml
                if (file.Extension == ".xml" && asset == "")
                {
                    System.Diagnostics.Debug.WriteLine("File is an .XML");
                    System.Diagnostics.Debug.WriteLine("file.FullName is: " + file.FullName);
                    XmlDocument xmlDoc = new XmlDocument();
                    xmlDoc.Load(path);
                    //load XML file in 

                    //check for <assetid> element
                    XmlNode assetIDNode = xmlDoc.GetElementsByTagName("assetid")[0];
                    //check for <Asset> element
                    XmlNode AssetIdNodeWithAttribute = xmlDoc.GetElementsByTagName("Asset")[0];

                    //if there is an <assetid> element
                    if (assetIDNode != null)
                    {
                        asset = assetIDNode.InnerText;
                    }
                    else if (AssetIdNodeWithAttribute != null) //if there is an <asset> element, see if it has an AssetID attribute
                    {
                        //get the attribute 
                        asset = AssetIdNodeWithAttribute.Attributes["AssetId"].Value;

                        if (AssetIdNodeWithAttribute.Attributes != null)
                        {
                            var attributeTest = AssetIdNodeWithAttribute.Attributes["AssetId"];
                            if (attributeTest != null)
                            {
                                asset = attributeTest.Value;
                            }
                        }
                    }
                }

                Item newFile = new Item
                {
                    AssetID = asset,
                    LCID = lcid,
                    LastModifiedDate = lastModified,
                    Path = path,
                    FileName = name
                };

                Console.WriteLine(newFile);

            }

            //get sub-folders for the current directory
            DirectoryInfo[] dirs = directoryToScan.GetDirectories("*.*");
            foreach (DirectoryInfo dir in dirs)
            {
                HBscan(dir);
            }
        }
    }
like image 568
RebeccaD Avatar asked Apr 06 '26 17:04

RebeccaD


2 Answers

I havent checked, but i think this could work.

The code will create one scanner per thread and perform the HBscan method.

public static void Main(string[] args)
        {
            //change rootDirectory variable to point to directory which you wish to scan through
            string rootDirectory = @"C:\sample";
            DirectoryInfo dir = new DirectoryInfo(rootDirectory);

            //get the LCIDs from the folders
            string[] filePaths = Directory.GetDirectories(rootDirectory);
            for (int i = 0; i < filePaths.Length; i++)
            {
                string LCID = filePaths[i].Split('\\').Last();
                Console.WriteLine(LCID);

                Thread t1 = new Thread(() => new HBScanner(new DirectoryInfo(filePaths[i])).HBscan());
                t1.Start();
            }

            Console.WriteLine("Scanning through files...");

        }
        public class HBScanner
        {
            private DirectoryInfo DirectoryToScan { get; set; }

            public HBScanner(DirectoryInfo startDir)
            {
                DirectoryToScan = startDir;
            }

            public void HBscan()
            {
                HBscan(DirectoryToScan);
            }

            public static void HBscan(DirectoryInfo directoryToScan)
            {
                //create an array of files using FileInfo object
                FileInfo[] files;
                //get all files for the current directory
                files = directoryToScan.GetFiles("*.*");
                string asset = "";
                string lcid = "";

                //iterate through the directory and get file details
                foreach (FileInfo file in files)
                {
                    String name = file.Name;
                    DateTime lastModified = file.LastWriteTime;
                    String path = file.FullName;

                    //first check the file name for asset id using regular expression
                    Regex regEx = new Regex(@"([A-Z][A-Z][0-9]{8,10})\.");
                    asset = regEx.Match(file.Name).Groups[1].Value.ToString();

                    //get LCID from the file path using regular expression
                    Regex LCIDregEx = new Regex(@"sample\\(\d{4,5})");
                    lcid = LCIDregEx.Match(file.FullName).Groups[1].Value.ToString();

                    //if it can't find it from filename, it looks into xml
                    if (file.Extension == ".xml" && asset == "")
                    {
                        System.Diagnostics.Debug.WriteLine("File is an .XML");
                        System.Diagnostics.Debug.WriteLine("file.FullName is: " + file.FullName);
                        XmlDocument xmlDoc = new XmlDocument();
                        xmlDoc.Load(path);
                        //load XML file in 

                        //check for <assetid> element
                        XmlNode assetIDNode = xmlDoc.GetElementsByTagName("assetid")[0];
                        //check for <Asset> element
                        XmlNode AssetIdNodeWithAttribute = xmlDoc.GetElementsByTagName("Asset")[0];

                        //if there is an <assetid> element
                        if (assetIDNode != null)
                        {
                            asset = assetIDNode.InnerText;
                        }
                        else if (AssetIdNodeWithAttribute != null) //if there is an <asset> element, see if it has an AssetID attribute
                        {
                            //get the attribute 
                            asset = AssetIdNodeWithAttribute.Attributes["AssetId"].Value;

                            if (AssetIdNodeWithAttribute.Attributes != null)
                            {
                                var attributeTest = AssetIdNodeWithAttribute.Attributes["AssetId"];
                                if (attributeTest != null)
                                {
                                    asset = attributeTest.Value;
                                }
                            }
                        }
                    }

                    Item newFile = new Item
                    {
                        AssetID = asset,
                        LCID = lcid,
                        LastModifiedDate = lastModified,
                        Path = path,
                        FileName = name
                    };

                    Console.WriteLine(newFile);

                }

                //get sub-folders for the current directory
                DirectoryInfo[] dirs = directoryToScan.GetDirectories("*.*");
                foreach (DirectoryInfo dir in dirs)
                {
                    HBscan(dir);
                }
            }
        }
like image 188
Stian Standahl Avatar answered Apr 10 '26 16:04

Stian Standahl


If you are using .NET 4.0, you could Use TPL and use Parallel.For/Parallel.ForEach to work on multiple items at the same time fairly easy.

I just got in touch with it a few days before and it's very interesting. It gives you great performance by using multiple threads on different cores to speed up your working. Of cause this might be limited in your case due exessive IO accesses.

But it may be worth a try! (And altering your current source is fairly easy done to just check it out)

like image 33
Dennis Alexander Avatar answered Apr 10 '26 18:04

Dennis Alexander



Donate For Us

If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!