Please help to fix the script.
import pprint
import requests
import bs4
def get_catalog(url):
req = requests.get(url)
if req.status_code != requests.codes.ok:
print('Error: ', req.status_code)
else:
soup = bs4.BeautifulSoup(req.text)
#print(soup)
catalogMenu = soup.find('section', {'class': 'catalog'})
catalogMenuList = catalogMenu.find('ul', {'class': 'topnav'})
#print(catalogMenuList)
return catalogMenuList
def parse_catalog_categories(catalogMenuList):
catalogNames = []
#li = catalogMenuList.findNext('li', limit=1) #?????????????????
pprint.pprint(li)
if __name__ == "__main__":
url = 'http://first-store.ru/'
catalogMenuList = get_catalog(url)
if not catalogMenuList:
print('Get catalog error')
else:
parse_catalog_categories(catalogMenuList)
The problem is that I can't find all the descendants of li
first level of nesting. ie:
iphone, ipad, ipod, imac, etc...
But not:
iphone, iphone 5s, iphone 5s VIP, iphone 5c, .....
Try to set recursive=False
to search only among direct children of the tag:
items = catalogMenuList.find_all('li', recursive=False)
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With