I have the below code, which searches Twitter and scrolls through the infinite scrolling. The line 'print data' is not working for me though. Any ideas?
# Import Selenium stuff
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.support.ui import WebDriverWait
from selenium.common.exceptions import TimeoutException
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import NoSuchElementException
from selenium.common.exceptions import NoAlertPresentException
# Import other needed packages
import sys
import unittest, time, re
# Call up Firefox, do the Twitter search, click the "All" link and start paging
class Sel(unittest.TestCase):
def setUp(self):
self.driver = webdriver.Firefox()
self.driver.implicitly_wait(30)
self.base_url = "https://twitter.com"
self.verificationErrors = []
self.accept_next_alert = True
def test_sel(self):
driver = self.driver
delay = 3
driver.get(self.base_url + "/search?q=storstrut&src=typd")
driver.find_element_by_link_text("All").click()
for i in range(1,100):
self.driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
time.sleep(4)
html_source = driver.page_source
data = html_source.encode('utf-8')
print data
if __name__ == "__main__":
unittest.main()
Firefox: CTRL + U (Meaning press the CTRL key on your keyboard and hold it down. While holding down the CTRL key, press the “u” key.) Alternatively, you can go to the “Firefox” menu and then click on “Web Developer,” and then “Page Source.” Edge/Internet Explorer: CTRL + U.
We can get the innerHTML attribute to get the source of the web element. The innerHTML is an attribute of a webelement which is equal to the text that is present between the starting and ending tag. The get_attribute method is used for this and innerHTML is passed as an argument to the method.
you have a lot of unused code and weird imports, but you are on the right track.
Here is a simplified version, with comments explaining.
import time
from selenium import webdriver
# launch Firefox
driver = webdriver.Firefox()
# load Twitter page
driver.get("https://twitter.com/search?q=storstrut&src=typd")
# the following javascript scrolls down the entire page body. Since Twitter
# uses "inifinite scrolling", more content will be added to the bottom of the
# DOM as you scroll... since it is in the loop, it will scroll down up to 100
# times.
for _ in range(100):
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")
# print all of the page source that was loaded
print driver.page_source.encode("utf-8")
# quit and close browser
driver.quit()
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With