Substitute dir /A:D. /B /S > FolderList. txt to produce a list of all folders and all subfolders of the directory.
PHP using scandir() to find folders in a directory The scandir function is an inbuilt function that returns an array of files and directories of a specific directory. It lists the files and directories present inside the path specified by the user.
The dir command displays a list of files and subdirectories in a directory. With the /S option, it recurses subdirectories and lists their contents as well. Options listed below may be preset in the DIRCMD environment variable.
Linux recursive directory listing using ls -R command. The -R option passed to the ls command to list subdirectories recursively.
You should be using the dirpath
which you call root
. The dirnames
are supplied so you can prune it if there are folders that you don't wish os.walk
to recurse into.
import os
result = [os.path.join(dp, f) for dp, dn, filenames in os.walk(PATH) for f in filenames if os.path.splitext(f)[1] == '.txt']
Edit:
After the latest downvote, it occurred to me that glob
is a better tool for selecting by extension.
import os
from glob import glob
result = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.txt'))]
Also a generator version
from itertools import chain
result = (chain.from_iterable(glob(os.path.join(x[0], '*.txt')) for x in os.walk('.')))
Edit2 for Python 3.4+
from pathlib import Path
result = list(Path(".").rglob("*.[tT][xX][tT]"))
Changed in Python 3.5: Support for recursive globs using “**”.
glob.glob()
got a new recursive parameter.
If you want to get every .txt
file under my_path
(recursively including subdirs):
import glob
files = glob.glob(my_path + '/**/*.txt', recursive=True)
# my_path/ the dir
# **/ every file and dir under my_path
# *.txt every file that ends with '.txt'
If you need an iterator you can use iglob as an alternative:
for file in glob.iglob(my_path, recursive=True):
# ...
This seems to be the fastest solution I could come up with, and is faster than os.walk
and a lot faster than any glob
solution.
f.path
to f.name
(do not change it for subfolders!).Args: dir: str, ext: list
.
Function returns two lists: subfolders, files
.
See below for a detailed speed anaylsis.
def run_fast_scandir(dir, ext): # dir: str, ext: list
subfolders, files = [], []
for f in os.scandir(dir):
if f.is_dir():
subfolders.append(f.path)
if f.is_file():
if os.path.splitext(f.name)[1].lower() in ext:
files.append(f.path)
for dir in list(subfolders):
sf, f = run_fast_scandir(dir, ext)
subfolders.extend(sf)
files.extend(f)
return subfolders, files
subfolders, files = run_fast_scandir(folder, [".jpg"])
In case you need the file size, you can also create a sizes
list and add f.stat().st_size
like this for a display of MiB:
sizes.append(f"{f.stat().st_size/1024/1024:.0f} MiB")
Speed analysis
for various methods to get all files with a specific file extension inside all subfolders and the main folder.
tl;dr:
fast_scandir
clearly wins and is twice as fast as all other solutions, except os.walk.os.walk
is second place slighly slower.glob
will greatly slow down the process.fast_scandir took 499 ms. Found files: 16596. Found subfolders: 439
os.walk took 589 ms. Found files: 16596
find_files took 919 ms. Found files: 16596
glob.iglob took 998 ms. Found files: 16596
glob.glob took 1002 ms. Found files: 16596
pathlib.rglob took 1041 ms. Found files: 16596
os.walk-glob took 1043 ms. Found files: 16596
Tests were done with W7x64, Python 3.8.1, 20 runs. 16596 files in 439 (partially nested) subfolders.find_files
is from https://stackoverflow.com/a/45646357/2441026 and lets you search for several extensions.fast_scandir
was written by myself and will also return a list of subfolders. You can give it a list of extensions to search for (I tested a list with one entry to a simple if ... == ".jpg"
and there was no significant difference).
# -*- coding: utf-8 -*-
# Python 3
import time
import os
from glob import glob, iglob
from pathlib import Path
directory = r"<folder>"
RUNS = 20
def run_os_walk():
a = time.time_ns()
for i in range(RUNS):
fu = [os.path.join(dp, f) for dp, dn, filenames in os.walk(directory) for f in filenames if
os.path.splitext(f)[1].lower() == '.jpg']
print(f"os.walk\t\t\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(fu)}")
def run_os_walk_glob():
a = time.time_ns()
for i in range(RUNS):
fu = [y for x in os.walk(directory) for y in glob(os.path.join(x[0], '*.jpg'))]
print(f"os.walk-glob\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(fu)}")
def run_glob():
a = time.time_ns()
for i in range(RUNS):
fu = glob(os.path.join(directory, '**', '*.jpg'), recursive=True)
print(f"glob.glob\t\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(fu)}")
def run_iglob():
a = time.time_ns()
for i in range(RUNS):
fu = list(iglob(os.path.join(directory, '**', '*.jpg'), recursive=True))
print(f"glob.iglob\t\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(fu)}")
def run_pathlib_rglob():
a = time.time_ns()
for i in range(RUNS):
fu = list(Path(directory).rglob("*.jpg"))
print(f"pathlib.rglob\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(fu)}")
def find_files(files, dirs=[], extensions=[]):
# https://stackoverflow.com/a/45646357/2441026
new_dirs = []
for d in dirs:
try:
new_dirs += [ os.path.join(d, f) for f in os.listdir(d) ]
except OSError:
if os.path.splitext(d)[1].lower() in extensions:
files.append(d)
if new_dirs:
find_files(files, new_dirs, extensions )
else:
return
def run_fast_scandir(dir, ext): # dir: str, ext: list
# https://stackoverflow.com/a/59803793/2441026
subfolders, files = [], []
for f in os.scandir(dir):
if f.is_dir():
subfolders.append(f.path)
if f.is_file():
if os.path.splitext(f.name)[1].lower() in ext:
files.append(f.path)
for dir in list(subfolders):
sf, f = run_fast_scandir(dir, ext)
subfolders.extend(sf)
files.extend(f)
return subfolders, files
if __name__ == '__main__':
run_os_walk()
run_os_walk_glob()
run_glob()
run_iglob()
run_pathlib_rglob()
a = time.time_ns()
for i in range(RUNS):
files = []
find_files(files, dirs=[directory], extensions=[".jpg"])
print(f"find_files\t\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(files)}")
a = time.time_ns()
for i in range(RUNS):
subf, files = run_fast_scandir(directory, [".jpg"])
print(f"fast_scandir\ttook {(time.time_ns() - a) / 1000 / 1000 / RUNS:.0f} ms. Found files: {len(files)}. Found subfolders: {len(subf)}")
I will translate John La Rooy's list comprehension to nested for's, just in case anyone else has trouble understanding it.
result = [y for x in os.walk(PATH) for y in glob(os.path.join(x[0], '*.txt'))]
Should be equivalent to:
import glob
import os
result = []
for x in os.walk(PATH):
for y in glob.glob(os.path.join(x[0], '*.txt')):
result.append(y)
Here's the documentation for list comprehension and the functions os.walk and glob.glob.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With