My project consists of making a competitive watch table for hotel rates for an agency. It is a painful action that I wanted to automate, the code extract correctly the name of hotels and the prices I want to extract but it's working correctly only for the first hotel and I don't know where is the problem. I provide you with the code and the output, if any of you can help me and thank you in advance.
NB : the code 2 works correctly but when i've added more operations the problem appeared
code 1
#!/usr/bin/env python
# coding: utf-8
import time
from time import sleep
import ast
import pandas as pd
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\\Users\\marketing2\\Documents\\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Tozeur',
'date_from': '11/09/2021',
'date_to': '12/09/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="boutonr"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
def existsElement(xpath):
try:
driver.find_element_by_id(xpath);
except NoSuchElementException:
return "false"
else:
return "true"
if (existsElement('result_par_arrangement')=="false"):
btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div')))
btn_t.click()
sleep(10)
else :
pass
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="resultat"]/div/form/div/div[2]/div[1]/div[2]/div[2]/div')))
btn_passe.click()
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': '[email protected]',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_id('titre_Nabeul')
btn_agence.click()
btn_continuez = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'boutonr')))
btn_continuez.click()
achat = int(driver.find_element_by_xpath('/html/body/header/div[2]/div[1]/div[1]/div[4]/div[2]/div[2]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,achat,marge
driver.get(url)
btn_display = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div')))
btn_display.click()
sleep(10)
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
s="- {} | {} : {}".format(name, opt, optiondata)
print(s)
ds = []
for l in s.splitlines():
d = l.split('-')
if len(d) > 1:
df = pd.DataFrame(ast.literal_eval(d[1].strip()))
ds.append(df)
for df in ds:
df.reset_index(drop=True, inplace=True)
df = pd.concat(ds, axis= 1)
cols = df.columns
cols = [((col.split('.')[0], col)) for col in df.columns]
df.columns=pd.MultiIndex.from_tuples(cols)
print(df.T)
#print("{} : {} - {}".format(name, opt, optiondata))
code 2
from selenium.webdriver.support.ui import Select
from selenium.common.exceptions import StaleElementReferenceException,NoSuchElementException
urls = []
hotels = driver.find_elements_by_xpath("//div[starts-with(@id,'produit_affair')]")
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = driver.find_element_by_xpath("//div[@class='bloc_titre_hotels']/h2").text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
selection = Select(driver.find_element_by_id("arrangement"))
for i in range(num):
try:
selection = Select(driver.find_element_by_id("arrangement"))
selection.select_by_index(i)
time.sleep(2)
arr = driver.find_element_by_xpath("//select[@id='arrangement']/option[@selected='selected']").text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr]=prize
except StaleElementReferenceException:
pass
except NoSuchElementException:
pass
print("{} : {} - {} - {}".format(name,opt,num,optiondata))
boutonr
doesn't exist on the page anymore.time.sleep()
to an absolute minimum as it is a waste of time for your code execution. Use WebDriverWait(...)
insteadI don't speak French so I could not understand what you are after in your code, but this minimised example below should help you to understand the principle.
#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("C:\chromedriver.exe")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = { 'destination': 'Nabeul',
'date_from': '25/08/2021',
'date_to': '26/08/2021',
'bedroom': '1' }
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
for i in range(num):
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement')))).select_by_index(i)
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = int(prize)
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
Result:
Byzance Nabeul : Chambre Double - {'All Inclusive soft': 93, 'Demi Pension': 38, 'Petit Dejeuner': 28, 'Pension Complete': 78}
Palmyra Club Nabeul Nabeul : Double Standard - {'All Inclusive soft': 92}
The following code goes to the payment page and extracts all the info there:
#!/usr/bin/env python
# coding: utf-8
import time
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait, Select
from selenium.common.exceptions import StaleElementReferenceException, NoSuchElementException
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.common.by import By
driver = webdriver.Chrome("/usr/local/bin/chromedriver")
driver.get('https://tn.tunisiebooking.com/')
# params to select
params = {
'destination': 'Nabeul',
'date_from': '29/08/2021',
'date_to': '30/08/2021',
'bedroom': '1'
}
# select destination
destination_select = Select(WebDriverWait(driver, 20).until(EC.element_to_be_clickable((By.ID, 'ville_des'))))
destination_select.select_by_value(params['destination'])
# select bedroom
bedroom_select = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'select_ch'))))
bedroom_select.select_by_value(params['bedroom'])
# select dates
script = f"document.getElementById('checkin').value ='{params['date_from']}';"
script += f"document.getElementById('checkout').value ='{params['date_to']}';"
script += f"document.getElementById('depart').value ='{params['date_from']}';"
script += f"document.getElementById('arrivee').value ='{params['date_to']}';"
driver.execute_script(script)
# submit form
btn_rechercher = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//div[@onclick="return submit_hotel_recherche()"]')))
btn_rechercher.click()
urls = []
hotels = WebDriverWait(driver, 10).until(EC.presence_of_all_elements_located((By.XPATH, "//div[starts-with(@id,'produit_affair')]")))
for hotel in hotels:
link = hotel.find_element_by_xpath(".//span[@class='tittre_hotel']/a").get_attribute("href")
urls.append(link)
for url in urls:
driver.get(url)
try:
name = WebDriverWait(driver, 10).until(EC.presence_of_element_located((By.XPATH, "//div[@class='bloc_titre_hotels']/h2"))).text
arropt = driver.find_element_by_xpath("//div[contains(@class,'line_result')][1]")
opt = arropt.find_element_by_tag_name("b").text
num = len(arropt.find_elements_by_tag_name("option"))
optiondata = {}
achats = {}
marges= {}
try:
selection = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'arrangement'))))
time.sleep(0.5)
arr = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, "//select[@id='arrangement']/option[@selected='selected']"))).text
prize = driver.find_element_by_id("prix_total").text
optiondata[arr] = (int(prize))
btn_passe = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'resa')))
btn_passe.click()
tot = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.ID, 'montant_total_apres_code')))
total = int(tot.text.replace(' €', ''))
# params to select
params = {
'civilite_acheteur': 'Mlle',
'prenom_acheteur': 'test',
'nom_acheteur': 'test',
'e_mail_acheteur': '[email protected]',
'portable_acheteur': '22222222',
'ville_acheteur': 'Test',
}
# select civilite
civilite_acheteur = Select(WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.NAME, 'civilite_acheteur'))))
civilite_acheteur.select_by_value(params['civilite_acheteur'])
# saisir prenom
script = f"document.getElementsByName('prenom_acheteur')[0].value ='{params['prenom_acheteur']}';"
script += f"document.getElementsByName('nom_acheteur')[0].value ='{params['nom_acheteur']}';"
script += f"document.getElementsByName('e_mail_acheteur')[0].value ='{params['e_mail_acheteur']}';"
script += f"document.getElementsByName('portable_acheteur')[0].value ='{params['portable_acheteur']}';"
script += f"document.getElementsByName('ville_acheteur')[0].value ='{params['ville_acheteur']}';"
driver.execute_script(script)
# submit form
btn_agence = driver.find_element_by_class_name('continuez_resa')
btn_agence.click()
achat1 = int(driver.find_element_by_id('montant_a_payer').text.replace(' €', ''))
achat = int(driver.find_element_by_id('montant_restant').text.replace(' €', ''))
achat3 = float(driver.find_element_by_xpath('//div[@class="ligne_interne_total"]/div[3]/div[@class="prix_total1 text_shadow"]').text.replace(' TND', ''))
achats[arr]=achat
marge =int(((float(prize) - float(achat)) / float(achat)) * 100);
marges[arr]=marge
optiondata[arr]=prize,total,achat1,achat,achat3,marge
except StaleElementReferenceException:
pass
print("{} : {} - {}".format(name, opt, optiondata))
except NoSuchElementException:
pass
driver.quit()
Output:
Byzance Nabeul : Chambre Double - {'Petit Dejeuner': (36, 41, 12, 29, 4.0, 24)}
Where:
36 = Prix Total
41 = Montant Total
12 = Montant de l'acompte
29 = Vous payerez le reste à votre arrivée à l'hôtel
4.0 = Total taxe de séjour à payer sur place à l'hôtel est
24 = Marges
Hotel page:
You are using sleeps to load the pages in your first example but not in your second one (the one that you state works just fine).
This is typically not the way you want to actually use selenium and leads me to believe that your timing is off.
This SO answer shows you how to use "Explicit Waits" on "expected_conditions" to not have "specific timings" which can/will fail.
You even create a wait
object but never use it.
Use it in conjunction with expected_conditions
and remove the specific timed sleeps and things will get better.
expected_conditions docs are here
The problem was that it can't access to the element listing arrangements for the rest of the hotels in the list i've added a function that tests the presence of the data and it workod
for url in urls:
driver.get(url)
def existsElement(xpath):
try:
driver.find_element_by_id(xpath);
except NoSuchElementException:
return "false"
else:
return "true"
if (existsElement('result_par_arrangement')=="false"):
btn_t = WebDriverWait(driver, 10).until(EC.element_to_be_clickable((By.XPATH, '//*[@id="moteur_rech"]/form/div/div[3]/div')))
btn_t.click()
else :
pass
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With