I want to scrape all matches links from this page 'https://m.aiscore.com/basketball/20210610' but can get only limiter numberof matches: I tried this code :
import requests
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=r"C:/chromedriver.exe", options=options)
url = 'https://m.aiscore.com/basketball/20210610'
driver.get(url)
driver.maximize_window()
driver.implicitly_wait(60)
driver.execute_script("window.scrollTo(0, document.body.scrollHeight)")
soup = BeautifulSoup(driver.page_source, 'html.parser')
links = [i['href'] for i in soup.select('.w100.flex a')]
links_length = len(links) #always return 16
driver.quit()
When I run the code, I get always 16 matches links only, but the page has 35 matches. I need to get allthe matches links in the page.
As the site is being loaded when scrolled, I have tried to Scroll one screen at a time until the height we need to scroll to is larger than the total scroll height of the window.
I have used a set
for storing the match links to avoid adding already existing match links.
At the time of running this, I was able to find all the links. Hope this will work for you as well.
import requests
import time
from bs4 import BeautifulSoup
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
options = Options()
options.add_argument("--headless")
driver = webdriver.Chrome(executable_path=r"C:\Users\User\Downloads\chromedriver.exe", options=options)
url = 'https://m.aiscore.com/basketball/20210610'
driver.get(url)
# Wait till the webpage is loaded
time.sleep(2)
# wait for 1sec after scrolling
scroll_wait = 1
# Gets the screen height
screen_height = driver.execute_script("return window.screen.height;")
driver.implicitly_wait(60)
# Number of scrolls. Initially 1
ScrollNumber = 1
# Set to store all the match links
ans = set()
while True:
# Scrolling one screen at a time until
driver.execute_script(f"window.scrollTo(0, {screen_height * ScrollNumber})")
ScrollNumber += 1
# Wait for some time after scroll
time.sleep(scroll_wait)
# Updating the scroll_height after each scroll
scroll_height = driver.execute_script("return document.body.scrollHeight;")
# Fetching the data that we need - Links to Matches
soup = BeautifulSoup(driver.page_source, 'html.parser')
for j in soup.select('.w100 .flex a'):
if j['href'] not in ans:
ans.add(j['href'])
# Break when the height we need to scroll to is larger than the scroll height
if (screen_height) * ScrollNumber > scroll_height:
break
print(f'Links found: {len(ans)}')
Output:
Links found: 61
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With