I wrote a personal web scraper for myself that scraps artist information. the code works but when I press the button and it start processing the while loop, the GUI freezes. I got the textBoxes to .refresh(). But I can't move the form, also the only way to cancel the program is to force quit. I'm in the process of rewriting this so I don't get this problem. also, I heard about treading and wanted to see if that would work, and also make it a little faster. the program is scraping 15,000+ pages, and then each page has another 10 or so pages it needs to scrape. So, the program could run for hours before it's finally finished.
Here is my code.
private void btnGet_Click(object sender, EventArgs e)
{
int i = 0;
int maxCount = 15000; //11234 was last value
progressBar.Maximum = maxCount;
while (i <= maxCount)
{
txbURL.Text = "http://www.newreleasetuesday.com/albumdetail.php?album_id=" + i;
label.Text = i.ToString() + " out of " + maxCount.ToString() + " Done.";
progressBar.Value = i;
string url = txbURL.Text;
string sourceCode = sourceCode = WorkerClass.getSourceCode(url);
int startIndex = sourceCode.IndexOf("//alert(document.getElementById(\"remcheck\").value)");
sourceCode = sourceCode.Substring(startIndex, sourceCode.Length - startIndex);
//Start Artist Name
//Gets the Artist's ID
int idCountIndex = sourceCode.IndexOf(" by <a href=\"artistdetail.php?artist_id=") + 41;
int idCountEndIndex = sourceCode.IndexOf("\">", idCountIndex);
string artistID = sourceCode.Substring(idCountIndex, idCountEndIndex - idCountIndex) + "";
txbArtistID.Text = artistID;
//Gets Artist's Name
startIndex = sourceCode.IndexOf(" by <a href=\"artistdetail.php?artist_id=") + 43 + artistID.Length;
int endIndex = sourceCode.IndexOf("</a> | Genre", startIndex);
string artistName = sourceCode.Substring(startIndex, endIndex - startIndex) + "";
txbArtist.Text = artistName;
//End Artist Name
//Start Album Name
//Gets Album's ID
string albumID = url.Substring(url.IndexOf("=") + 1);
txbAlbumID.Text = albumID;
//Gets Album's Name
startIndex = sourceCode.IndexOf("absbottom\"></span></strong> ") + 28;
endIndex = sourceCode.IndexOf("</span></td>", startIndex);
string AlbumName = sourceCode.Substring(startIndex, endIndex - startIndex) + "";
txbAlbum.Text = AlbumName;
//End Album Name
//Start Genre
startIndex = sourceCode.IndexOf("</a> | Genre: ") + 14;
endIndex = sourceCode.IndexOf(" | ", startIndex);
string genre = sourceCode.Substring(startIndex, endIndex - startIndex) + "";
txbGenre.Text = genre;
//End Genre
//Start Release Date
startIndex = sourceCode.IndexOf("<a href=\"releasedate.php?release_date=") + 50;
endIndex = sourceCode.IndexOf(" </a></td>", startIndex);
string releaseDate = sourceCode.Substring(startIndex, endIndex - startIndex) + "";
txbReleaseDate.Text = releaseDate;
//End Release Date
//Start Pic URL
startIndex = sourceCode.IndexOf("<img src=\"") + 11;
endIndex = sourceCode.IndexOf("\" alt=", startIndex);
string PicURL = sourceCode.Substring(startIndex, endIndex - startIndex) + "";
PicURL = PicURL.Replace("amp;", "");
string fullLink = "http://www.newreleasetuesday.com/" + PicURL;
txbPicURL.Text = fullLink;
//End Pic URL
//Refresh UI (Updates textBoxes, labels, and progressBar with new values)
txbURL.Refresh();
txbArtist.Refresh();
txbAlbum.Refresh();
txbReleaseDate.Refresh();
txbGenre.Refresh();
txbPicURL.Refresh();
txbArtistID.Refresh();
txbAlbumID.Refresh();
label.Refresh();
progressBar.Refresh();
if (artistName == "")
{
// Adding info to Database if there is no artist name
string cs = "SERVER=asdf.net;" +
"DATABASE=music;" +
"UID=root;" +
"PASSWORD=asdf;";
MySqlConnection conn = null;
conn = new MySqlConnection(cs);
conn.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = conn;
cmd.CommandText = "INSERT INTO `emptyalbums` (id, albumid) VALUES('',@albumid)";
cmd.Prepare();
cmd.Parameters.AddWithValue("@albumid", albumID);
cmd.ExecuteNonQuery();
conn.Close();
}
else
{
// Adding info to Database if there is an artist name
string cs = "SERVER=asdf.net;" +
"DATABASE=music;" +
"UID=root;" +
"PASSWORD=asdf;";
MySqlConnection conn = null;
conn = new MySqlConnection(cs);
conn.Open();
MySqlCommand cmd = new MySqlCommand();
cmd.Connection = conn;
cmd.CommandText = "INSERT INTO `database` (id, artist, album, releasedate, genre, pictureurl, artistid, albumid) VALUES('',@artist, @album, @releasedate, @genre, @pictureurl, @artistid, @albumid)";
cmd.Prepare();
cmd.Parameters.AddWithValue("@artist", artistName);
cmd.Parameters.AddWithValue("@album", AlbumName);
cmd.Parameters.AddWithValue("@releasedate", releaseDate);
cmd.Parameters.AddWithValue("@genre", genre);
cmd.Parameters.AddWithValue("@pictureurl", fullLink);
cmd.Parameters.AddWithValue("@artistid", artistID);
cmd.Parameters.AddWithValue("@albumid", albumID);
cmd.ExecuteNonQuery();
conn.Close();
}
i++;
}
Any info would go a long way. Thanks, Throdne
Multi-threading is indeed the solution to your problem. What happens here is that the treatment is launched on your GUI Thread and everything freezes until your loop has finished processing.
The implementation of multi-threading will depend on your framework and your needs, but if you use .Net 4.0 you might want to check out the TPL library.
http://msdn.microsoft.com/en-us/library/dd460717.aspx
Other than that, a simple google search about mutli-threading will get you where you want to be in no time.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With