import requests
from bs4 import BeautifulSoup
import pandas as pd
baseurl='https://signal.nfx.com/'
headers ={
'User-Agent':'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.114 Safari/537.36'
}
r =requests.get('https://signal.nfx.com/investor-lists/top-who-invested-in-female-founders-investors')
soup=BeautifulSoup(r.content, 'html.parser')
tra = soup.find_all('div',class_='pr3')
productlinks=[]
p=[]
u=[]
for links in tra:
for link in links.find_all('a',href=True):
comp=baseurl+link['href']
productlinks.append(comp)
for link in productlinks:
r =requests.get(link,headers=headers)
soup=BeautifulSoup(r.content, 'html.parser')
try:
address=soup.find('span',class_='ml1').text
except:
address=''
p.append(address)
try:
link=soup.find('a',class_='ml1 subheader lower-subheader').text
except:
link=''
u.append(link)
df = pd.DataFrame(
{"address": p, "link": u}
)
print(df)
This is my output that give me only one address and link and then they will print empty list and finish the task can you help to find all the addresses and link I am try to scrape data from page but they will not provide complete information of the page
these is link in which I scrape information https://signal.nfx.com/investors/aaleen-anjum
address link
0 Toronto, Ontario twosmallfish.vc
1
2
3
4
5
6
7
8
9
10
11
You can get the data through the api. For example, here's the investors:
import requests
import pandas as pd
url= "https://signal-api.nfx.com/graphql"
headers = {'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/95.0.4638.69 Safari/537.36'}
payload = {"operationName":"vclInvestors",
"variables":{"slug":"who-invested-in-female-founders",
"order":[{}],
"after":""},
"query":"query vclInvestors($slug: String!, $after: String) {\n list(slug: $slug) {\n id\n slug\n investor_count\n vertical {\n id\n display_name\n kind\n __typename\n }\n location {\n id\n display_name\n __typename\n }\n stage\n firms {\n id\n name\n slug\n __typename\n }\n scored_investors(first: 8, after: $after) {\n pageInfo {\n hasNextPage\n hasPreviousPage\n endCursor\n __typename\n }\n record_count\n edges {\n node {\n ...investorListInvestorProfileFields\n __typename\n }\n __typename\n }\n __typename\n }\n __typename\n }\n}\n\nfragment investorListInvestorProfileFields on InvestorProfile {\n id\n person {\n id\n first_name\n last_name\n name\n slug\n is_me\n is_on_target_list\n __typename\n }\n image_urls\n position\n min_investment\n max_investment\n target_investment\n is_preferred_coinvestor\n firm {\n id\n name\n slug\n __typename\n }\n investment_locations {\n id\n display_name\n location_investor_list {\n id\n slug\n __typename\n }\n __typename\n }\n investor_lists {\n id\n stage_name\n slug\n vertical {\n id\n display_name\n __typename\n }\n __typename\n }\n __typename\n}\n"}
results = pd.DataFrame()
hasNextPage = True
after = ''
while hasNextPage == True:
payload['variables']['after'] == after
jsonData = requests.post(url, headers=headers, json=payload ).json()
data = jsonData['data']['list']['scored_investors']['edges']
df = pd.json_normalize(data)
results = results.append(df, sort=False).reset_index(drop=True)
count = len(results)
tot = jsonData['data']['list']['investor_count']
print(f'{count} of {tot}')
hasNextPage = jsonData['data']['list']['scored_investors']['pageInfo']['hasNextPage']
after = jsonData['data']['list']['scored_investors']['pageInfo']['endCursor']
Output:
print(results.head(2).to_string())
__typename node.__typename node.id node.person.id node.person.first_name node.person.last_name node.person.name node.person.slug node.person.is_me node.person.is_on_target_list node.person.__typename node.image_urls node.position node.min_investment node.max_investment node.target_investment node.is_preferred_coinvestor node.firm.id node.firm.name node.firm.slug node.firm.__typename node.investment_locations node.investor_lists node.firm
0 InvestorProfileEdge InvestorProfile 19676 87099 Aaleen Anjum Aaleen Anjum aaleen-anjum False False Person [https://signal-api.nfx.com/rails/active_storage/representations/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBMWp2QVE9PSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--1dc8054880c588f1fd59361ebd5d8526f841049d/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCem9MWm05eWJXRjBPZ2hxY0djNkUzSmxjMmw2WlY5MGIxOW1hV3hzV3dkcEFsZ0NhUUpZQWc9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--f8e22238db523e6e5e5a8ae643921849c4b207bd/0, https://signal-api.nfx.com/rails/active_storage/representations/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBMW52QVE9PSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--df77fc9ad679d550ce8e2472e47150cb9fc610e6/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCem9MWm05eWJXRjBPZ2hxY0djNkUzSmxjMmw2WlY5MGIxOW1hV3hzV3dkcEFsZ0NhUUpZQWc9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--f8e22238db523e6e5e5a8ae643921849c4b207bd/1, https://signal-api.nfx.com/rails/active_storage/representations/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBMXJ2QVE9PSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--1f58605b9a843b9ee1e820d63d154aea24936f84/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCem9MWm05eWJXRjBPZ2hxY0djNkUzSmxjMmw2WlY5MGIxOW1hV3hzV3dkcEFsZ0NhUUpZQWc9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--f8e22238db523e6e5e5a8ae643921849c4b207bd/2, https://signal-api.nfx.com/rails/active_storage/representations/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBMXZ2QVE9PSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--2a200a001411bbff92bd9deb68b4a54215ee0863/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCem9MWm05eWJXRjBPZ2hxY0djNkUzSmxjMmw2WlY5MGIxOW1hV3hzV3dkcEFsZ0NhUUpZQWc9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--f8e22238db523e6e5e5a8ae643921849c4b207bd/3] analyst 150000 1000000 250000 False 4445 Two Small Fish Ventures two-small-fish-ventures Firm [] [{'id': '6141', 'stage_name': 'Pre-Seed', 'slug': 'ai-pre-seed', 'vertical': {'id': '3', 'display_name': 'AI', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '3', 'stage_name': 'Seed', 'slug': 'ai-seed', 'vertical': {'id': '3', 'display_name': 'AI', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6144', 'stage_name': 'Pre-Seed', 'slug': 'blockchain-pre-seed', 'vertical': {'id': '7', 'display_name': 'Blockchain', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '7', 'stage_name': 'Seed', 'slug': 'blockchain-seed', 'vertical': {'id': '7', 'display_name': 'Blockchain', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '5406', 'stage_name': 'Other Lists', 'slug': 'british-columbia', 'vertical': {'id': '9678', 'display_name': 'British Columbia', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6169', 'stage_name': 'Pre-Seed', 'slug': 'consumer-health-pre-seed', 'vertical': {'id': '11', 'display_name': 'Consumer Health', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '11', 'stage_name': 'Seed', 'slug': 'consumer-health-seed', 'vertical': {'id': '11', 'display_name': 'Consumer Health', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6145', 'stage_name': 'Pre-Seed', 'slug': 'cryptocurrency-pre-seed', 'vertical': {'id': '13', 'display_name': 'Cryptocurrency', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '13', 'stage_name': 'Seed', 'slug': 'cryptocurrency-seed', 'vertical': {'id': '13', 'display_name': 'Cryptocurrency', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6203', 'stage_name': 'Pre-Seed', 'slug': 'cybersecurity-pre-seed', 'vertical': {'id': '57799', 'display_name': 'Cybersecurity', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '5554', 'stage_name': 'Seed', 'slug': 'cybersecurity-seed', 'vertical': {'id': '57799', 'display_name': 'Cybersecurity', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6172', 'stage_name': 'Pre-Seed', 'slug': 'developer-tools-pre-seed', 'vertical': {'id': '15', 'display_name': 'Developer Tools', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '15', 'stage_name': 'Seed', 'slug': 'developer-tools-seed', 'vertical': {'id': '15', 'display_name': 'Developer Tools', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6205', 'stage_name': 'Pre-Seed', 'slug': 'digital-health-pre-seed', 'vertical': {'id': '57801', 'display_name': 'Digital Health', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '5644', 'stage_name': 'Seed', 'slug': 'digital-health-seed', 'vertical': {'id': '57801', 'display_name': 'Digital Health', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6207', 'stage_name': 'Pre-Seed', 'slug': 'direct-to-consumer-dtc-pre-seed', 'vertical': {'id': '57803', 'display_name': 'Direct-to-Consumer (DTC)', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '5734', 'stage_name': 'Seed', 'slug': 'direct-to-consumer-dtc-seed', 'vertical': {'id': '57803', 'display_name': 'Direct-to-Consumer (DTC)', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '171', 'stage_name': 'Other Lists', 'slug': 'diverse', 'vertical': {'id': '24242', 'display_name': 'Diverse Investors', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6148', 'stage_name': 'Pre-Seed', 'slug': 'enterprise-pre-seed', 'vertical': {'id': '20', 'display_name': 'Enterprise', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '20', 'stage_name': 'Seed', 'slug': 'enterprise-seed', 'vertical': {'id': '20', 'display_name': 'Enterprise', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '172', 'stage_name': 'Other Lists', 'slug': 'female', 'vertical': {'id': '24241', 'display_name': 'Female Investors', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6162', 'stage_name': 'Pre-Seed', 'slug': 'saas-pre-seed', 'vertical': {'id': '48', 'display_name': 'SaaS', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '47', 'stage_name': 'Seed', 'slug': 'saas-seed', 'vertical': {'id': '48', 'display_name': 'SaaS', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '169', 'stage_name': 'Other Lists', 'slug': 'who-invested-in-diverse-founders', 'vertical': {'id': '24244', 'display_name': 'Investors who invested in diverse founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '170', 'stage_name': 'Other Lists', 'slug': 'who-invested-in-female-founders', 'vertical': {'id': '24243', 'display_name': 'Investors who invested in female founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '187', 'stage_name': 'Other Lists', 'slug': 'who-were-founders', 'vertical': {'id': '24387', 'display_name': 'Investors who were founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}] NaN
1 InvestorProfileEdge InvestorProfile 13187 29548 Aamir Virani Aamir Virani aamir-virani False False Person [https://signal-api.nfx.com/rails/active_storage/representations/redirect/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaHBBeTJHQVE9PSIsImV4cCI6bnVsbCwicHVyIjoiYmxvYl9pZCJ9fQ==--a7cd75f799cb3eb96a06cbd6b67d287971185953/eyJfcmFpbHMiOnsibWVzc2FnZSI6IkJBaDdCem9MWm05eWJXRjBPZ2hxY0djNkUzSmxjMmw2WlY5MGIxOW1hV3hzV3dkcEFsZ0NhUUpZQWc9PSIsImV4cCI6bnVsbCwicHVyIjoidmFyaWF0aW9uIn19--f8e22238db523e6e5e5a8ae643921849c4b207bd/0] angel 1 100000 25000 False NaN NaN NaN NaN [{'id': '7500', 'display_name': 'California', 'location_investor_list': None, '__typename': 'Tag'}, {'id': '7502', 'display_name': 'Texas', 'location_investor_list': None, '__typename': 'Tag'}, {'id': '7498', 'display_name': 'United States', 'location_investor_list': None, '__typename': 'Tag'}] [{'id': '6141', 'stage_name': 'Pre-Seed', 'slug': 'ai-pre-seed', 'vertical': {'id': '3', 'display_name': 'AI', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '3', 'stage_name': 'Seed', 'slug': 'ai-seed', 'vertical': {'id': '3', 'display_name': 'AI', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6170', 'stage_name': 'Pre-Seed', 'slug': 'consumer-internet-pre-seed', 'vertical': {'id': '12', 'display_name': 'Consumer Internet', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '12', 'stage_name': 'Seed', 'slug': 'consumer-internet-seed', 'vertical': {'id': '12', 'display_name': 'Consumer Internet', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6152', 'stage_name': 'Pre-Seed', 'slug': 'hardware-pre-seed', 'vertical': {'id': '28', 'display_name': 'Hardware', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '28', 'stage_name': 'Seed', 'slug': 'hardware-seed', 'vertical': {'id': '28', 'display_name': 'Hardware', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6156', 'stage_name': 'Pre-Seed', 'slug': 'iot-pre-seed', 'vertical': {'id': '34', 'display_name': 'IoT', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '33', 'stage_name': 'Seed', 'slug': 'iot-seed', 'vertical': {'id': '34', 'display_name': 'IoT', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6179', 'stage_name': 'Pre-Seed', 'slug': 'local-services-pre-seed', 'vertical': {'id': '35', 'display_name': 'Local Services', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '34', 'stage_name': 'Seed', 'slug': 'local-services-seed', 'vertical': {'id': '35', 'display_name': 'Local Services', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6185', 'stage_name': 'Pre-Seed', 'slug': 'parenting-families-pre-seed', 'vertical': {'id': '43', 'display_name': 'Parenting/Families', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '42', 'stage_name': 'Seed', 'slug': 'parenting-families-seed', 'vertical': {'id': '43', 'display_name': 'Parenting/Families', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6199', 'stage_name': 'Pre-Seed', 'slug': 'real-estate-proptech-pre-seed', 'vertical': {'id': '45', 'display_name': 'Real Estate/PropTech', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '44', 'stage_name': 'Seed', 'slug': 'real-estate-proptech-seed', 'vertical': {'id': '45', 'display_name': 'Real Estate/PropTech', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6161', 'stage_name': 'Pre-Seed', 'slug': 'robotics-pre-seed', 'vertical': {'id': '47', 'display_name': 'Robotics', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '46', 'stage_name': 'Seed', 'slug': 'robotics-seed', 'vertical': {'id': '47', 'display_name': 'Robotics', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6162', 'stage_name': 'Pre-Seed', 'slug': 'saas-pre-seed', 'vertical': {'id': '48', 'display_name': 'SaaS', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '47', 'stage_name': 'Seed', 'slug': 'saas-seed', 'vertical': {'id': '48', 'display_name': 'SaaS', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '188', 'stage_name': 'Other Lists', 'slug': 'san-francisco-bay-area', 'vertical': {'id': '22992', 'display_name': 'San Francisco Bay Area', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '6187', 'stage_name': 'Pre-Seed', 'slug': 'smb-software-pre-seed', 'vertical': {'id': '51', 'display_name': 'SMB Software', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '50', 'stage_name': 'Seed', 'slug': 'smb-software-seed', 'vertical': {'id': '51', 'display_name': 'SMB Software', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '169', 'stage_name': 'Other Lists', 'slug': 'who-invested-in-diverse-founders', 'vertical': {'id': '24244', 'display_name': 'Investors who invested in diverse founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '170', 'stage_name': 'Other Lists', 'slug': 'who-invested-in-female-founders', 'vertical': {'id': '24243', 'display_name': 'Investors who invested in female founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}, {'id': '187', 'stage_name': 'Other Lists', 'slug': 'who-were-founders', 'vertical': {'id': '24387', 'display_name': 'Investors who were founders', '__typename': 'Tag'}, '__typename': 'InvestorList'}] NaN
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With