I am trying to use tqdm to report the progress of each file downloads from three links, I wanted to use multithreading to download simultaneously from each link at the same time update the progress bar. But when I execute my script, there are multiple lines of progress bar it seems the thread are updating the tqdm progress bar the same time. I am asking how should I run multithreading for downloading the files while maintaining progress bar for each download without duplicated bars filling the entire screen? Here is my code.
import os
import sys
import requests
from pathlib import Path
from tqdm import tqdm
from concurrent.futures import ThreadPoolExecutor as PE
def get_filename(url):
filename = os.path.basename(url)
fname, extension = os.path.splitext(filename)
if extension:
return filename
header = requests.head(url).headers
if "Location" in header:
return os.path.basename(header["Location"])
return fname
def get_file_size(url):
header = requests.head(url).headers
if "Content-Length" in header and header["Content-Length"] != 0:
return int(header["Content-Length"])
elif "Location" in header and "status" not in header:
redirect_link = header["Location"]
r = requests.head(redirect_link).headers
return int(r["Content-Length"])
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r, tqdm(
unit="B",
unit_scale=True,
unit_divisor=chunk_size,
desc=filename,
total=file_size,
file=sys.stdout
) as progress:
for chunk in r.iter_content(chunk_size=chunk_size):
data = f.write(chunk)
progress.update(data)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE(max_workers=len(urls)) as ex:
ex.map(download_file, urls)
I modified my code a bit, which i took from Use tqdm with concurrent.futures?.
def download_file(url, filename=None):
# Download to the Downloads folder in user's home folder.
download_dir = os.path.join(Path.home(), "Downloads")
if not os.path.exists(download_dir):
os.makedirs(download_dir, exist_ok=True)
if not filename:
filename = get_filename(url)
# file_size = get_file_size(url)
abs_path = os.path.join(download_dir, filename)
chunk_size = 1024
with open(abs_path, "wb") as f, requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=chunk_size):
f.write(chunk)
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with PE() as ex:
for url in urls:
tqdm(ex.submit(download_file, url),
total=get_file_size(url),
unit="B",
unit_scale=True,
unit_divisor=1024,
desc=get_filename(url),
file=sys.stdout)
But the bar is not updating after i modified my code...
My problem:
I have no problem with concurrent download, but has problem implementing tqdm to update individual progress for each link, below is what I want to achieve:
I used one of the solution:
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor() as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
But this is the result:
This would be the general idea (format it as you wish):
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
def download_file(url):
with requests.get(url, stream=True) as r:
for chunk in r.iter_content(chunk_size=50000):
pass
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=len(urls)) as pbar:
with ThreadPoolExecutor(max_workers=len(urls)) as ex:
futures = [ex.submit(download_file, url) for url in urls]
for future in as_completed(futures):
result = future.result()
pbar.update(1)
Simulation If You Knew Lengths of Each Download
from concurrent.futures import ThreadPoolExecutor, as_completed
from tqdm import tqdm
import requests
import time
import random
def download_file(url, pbar):
for _ in range(30):
time.sleep(.50 * random.random())
pbar.update(1)
return url
if __name__ == "__main__":
urls = ["http://mirrors.evowise.com/linuxmint/stable/20/linuxmint-20-xfce-64bit.iso",
"https://www.vmware.com/go/getworkstation-win",
"https://download.geany.org/geany-1.36_setup.exe"]
with tqdm(total=90) as pbar:
with ThreadPoolExecutor(max_workers=3) as ex:
futures = [ex.submit(download_file, url, pbar) for url in urls]
for future in as_completed(futures):
result = future.result()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With