Is it possible to use yield inside the map function?
For POC purpose, I have created a sample snippet.
# Python 3  (Win10)
from concurrent.futures import ThreadPoolExecutor
import os
def read_sample(sample):
    with open(os.path.join('samples', sample)) as fff:
        for _ in range(10):
            yield str(fff.read())
def main():
    with ThreadPoolExecutor(10) as exc:
        files = os.listdir('samples')
        files = list(exc.map(read_sample, files))
        print(str(len(files)), end="\r")
if __name__=="__main__":
     main()
I have 100 files in samples folder. As per the snippet 100*10=1000 should be printed. However, it prints 100 only. When I checked it just print generator object only.
With what change it'll be 1000 printed?
You can use map() with a generator, but it will just try to map generator objects, and it will not try to descend into the generators themselves.
A possible approach is to have a generator do the looping the way you want and have a function operate on the objects. This has the added advantage of separating more neatly the looping from the computation. So, something like this should work:
# Python 3  (Win10)
from concurrent.futures import ThreadPoolExecutor
import os
def read_samples(samples):
    for sample in samples:
        with open(os.path.join('samples', sample)) as fff:
            for _ in range(10):
                yield fff
def main():
    with ThreadPoolExecutor(10) as exc:
        files = os.listdir('samples')
        files = list(exc.map(lambda x: str(x.read()), read_samples(files)))
        print(str(len(files)), end="\r")
if __name__=="__main__":
     main()
Another approach is to nest an extra map call to consume the generators:
# Python 3  (Win10)
from concurrent.futures import ThreadPoolExecutor
import os
def read_samples(samples):
    for sample in samples:
        with open(os.path.join('samples', sample)) as fff:
            for _ in range(10):
                yield fff
def main():
    with ThreadPoolExecutor(10) as exc:
        files = os.listdir('samples')
        files = exc.map(list, exc.map(lambda x: str(x.read())), read_samples(files))
        files = [f for fs in files for f in fs]  # flattening the results
        print(str(len(files)), end="\r")
if __name__=="__main__":
     main()
Just to get to some more reproducible example, the traits of your code can be written in a more minimal example (that does not rely on files laying around on your system):
from concurrent.futures import ThreadPoolExecutor
def foo(n):
    for i in range(n):
        yield i
with ThreadPoolExecutor(10) as exc:
    x = list(exc.map(foo, range(k)))
    print(x)
# [<generator object foo at 0x7f1a853d4518>, <generator object foo at 0x7f1a852e9990>, <generator object foo at 0x7f1a852e9db0>, <generator object foo at 0x7f1a852e9a40>, <generator object foo at 0x7f1a852e9830>, <generator object foo at 0x7f1a852e98e0>, <generator object foo at 0x7f1a852e9fc0>, <generator object foo at 0x7f1a852e9e60>]
from concurrent.futures import ThreadPoolExecutor
def foos(ns):
    for n in range(ns):
        for i in range(n):
            yield i
with ThreadPoolExecutor(10) as exc:
    k = 8
    x = list(exc.map(lambda x: x ** 2, foos(k)))
    print(x)
# [0, 0, 1, 0, 1, 4, 0, 1, 4, 9, 0, 1, 4, 9, 16, 0, 1, 4, 9, 16, 25, 0, 1, 4, 9, 16, 25, 36]
from concurrent.futures import ThreadPoolExecutor
def foo(n):
    for i in range(n):
        yield i ** 2
with ThreadPoolExecutor(10) as exc:
    k = 8
    x = exc.map(list, exc.map(foo, range(k)))
    print([z for y in x for z in y])
# [0, 0, 1, 0, 1, 4, 0, 1, 4, 9, 0, 1, 4, 9, 16, 0, 1, 4, 9, 16, 25, 0, 1, 4, 9, 16, 25, 36]
                        If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With