I am running a script in Python3 using Selenium. I am getting my output what I expected. Now, I want to save my output to a text, or csv or json file. When I am trying to run my script and save result to a file I am getting an Error with open('bangkok_vendor.txt','a') as wt :
TypeError: 'NoneType' object is not callable
Which means loop in the program runs only one time and does not store data in file called bangkok_vendor.txt. In normal python scraper programs it would n't have any problem storing data but this is first time I am using selenium. Can you please help me with solution thanks.
I am trying to run this script from my terminal command and output is what to save to any file format :
from selenium import webdriver
from bs4 import BeautifulSoup as bs
import csv
import requests
contents =[]
filename = 'link_business_filter.csv'
def copy_json():
with open("bangkok_vendor.text",'w') as wt:
for x in script2:
wt.writer(x)
wt.close()
with open(filename,'rt') as f:
data = csv.reader(f)
for row in data:
links = row[0]
contents.append(links)
for link in contents:
url_html = requests.get(link)
print(link)
browser = webdriver.Chrome('chromedriver')
open = browser.get(link)
source = browser.page_source
data = bs(source,"html.parser")
body = data.find('body')
script = body
x_path = '//*[@id="react-root"]/section/main/div'
script2 = browser.find_element_by_xpath(x_path)
script3 = script2.text
#script2.send_keys(keys.COMMAND + 't')
browser.close()
print(script3)
script2 as a parameter for copy_json function and call it when you extract the data from the page. write mode to append, otherwise the file will be reset every time you call copy_json function.open, otherwise you won't be able to open a file to write data once you move onto the second iteration.I refactored your code a bit:
LINK_CSV = 'link_business_filter.csv'
SAVE_PATH = 'bangkok_vendor.txt'
def read_links():
links = []
with open(LINK_CSV) as f:
reader = csv.reader(f)
for row in reader:
links.append(row[0])
return links
def write_data(data):
with open(SAVE_PATH, mode='a') as f:
f.write(data + "\n")
if __name__ == '__main__':
browser = webdriver.Chrome('chromedriver')
links = read_links()
for link in links:
browser.get(link)
# You may have to wait a bit here
# until the page is loaded completely
html = browser.page_source
# Not sure what you're trying to do with body
# soup = BeautifulSoup(html, "html.parser")
# body = soup.find('body')
x_path = '//*[@id="react-root"]/section/main/div'
main_div = browser.find_element_by_xpath(x_path)
text = main_div.text
write_data(text)
# close browser after every link is processed
browser.quit()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With