Logo Questions Linux Laravel Mysql Ubuntu Git Menu
 

Web scraping with Python that requires login to view output

I am trying to output the job's salary but it says need login to view. I can successfully output the other jobs' descriptions like the job title, company, location, etc. I have tried logged in with my account and logged out but it still says login to view salary. My question is, how do I show the salary which requires login to view? Need someone to help me.

import requests
from bs4 import BeautifulSoup
from mechanize import Browser
import http.cookiejar as cookielib

#creates browser
br = Browser()
#browser options
br.set_handle_robots(False)  #ignore robots
br.set_handle_refresh(False) #can sometimes hang without this
br.addheaders = [('User-Agent', 'Firefox')]
login_url = "https://myjobstreet.jobstreet.com.my/home/login.php"
cj = cookielib.CookieJar()
br.set_cookiejar(cj)
response = br.open('https://myjobstreet.jobstreet.com.my/home/login.php')
#view available forms
for f in br.forms():
    print(f)
br.select_form('login')
br.set_all_readonly(False)   #allows everything to be written to
br.form['login_id'] = 'my_id'
br.form['password'] = 'my_password'
#submit current form
br.submit()

r = requests.get(url, headers=headers, auth=('user', 'pass'))
soup = BeautifulSoup(r.text, 'lxml')
jobs = soup.find_all("div", {"class": "rRow"})
for job in jobs:
    try:
        salary = job.find_all("div", {"class": "rRowLoc"})
        job_salary = salary[0].text.strip()
    except IndexError:
        pass

    print("Salary: ", job_salary)

This is the output:

Job:  Sales Executive
Company:  Company
Location:  Earth
Salary:  Login to view salary

Expected output:

Job:  Sales Executive
Company:  Company
Location:  Earth
Salary:  1000 
like image 516
A.nonymous Avatar asked Nov 06 '22 15:11

A.nonymous


1 Answers

Your code is not working, but your goal is to scrape Company Name, Position, Location and Salary from page.

You can do your login process using requests.

Salary detail is not available into HTML because it is coming through Ajax request, So every time you find Salary into HTML it will be blank.

import requests
import bs4 as bs

headers = {
    'Host': 'myjobstreet.jobstreet.com.my',
    'User-Agent': 'Mozilla/5.0 (Windows NT 6.2; WOW64) AppleWebKit/537.31 (KHTML, like Gecko) Chrome/26.0.1410.64 Safari/537.31',
}

login_url = 'https://myjobstreet.jobstreet.com.my/home/login.php?site=&language_code=3'
post_data_for_login = {
    "referer_url":"",
    "mobile_referer":"",
    "login_id":"**YOUR EMAIL ID**",
    "password":"**YOUR PASSWORD**",
    "remember":"on",
    "btn_login":"",
    "login":"1"
}

# Create Session.
session = requests.session()

# Login request to get cookies.
response = session.post(login_url, data=post_data_for_login, headers=headers)

print('login_response:', response.status_code)

job_page_url = 'https://www.jobstreet.com.my/en/job/fb-service-team-4126557'
job_page_json_url = job_page_url + '/panels'

# Update Host in headers.
headers['Host'] = 'www.jobstreet.com.my'

# Get Job details.
response = session.get(job_page_url, headers=headers)

# Fetch Company Name, Position and Location details from HTML.
soup = bs.BeautifulSoup(response.text, 'lxml')
company_name = soup.find("div", {"id": "company_name"}).text.strip()
position_title = soup.find("h1", {"id": "position_title"}).text.strip()
work_location = soup.find("span", {"id": "single_work_location"}).text.strip()
print('Company:', company_name);print('Position:', position_title);print('Location:', work_location)

# Get Salary data From JSON.
response = session.get(job_page_json_url, headers=headers)

# Fetch Salary details from JSON.
if response.status_code == 200:
    json_data = response.json()
    salary_tag = json_data['job_salary']

    soup = bs.BeautifulSoup(salary_tag, 'lxml')
    salary_range = soup.find("span", {"id": "salary_range"}).text
    print('Salary:', salary_range)

Output:

login_response: 200
Company: Copper Bar and Restaurant (88 Armenian Sdn Bhd)
Position: F&B Service Team
Location: Malaysia - Penang
Salary:  MYR 2,000 - MYR 2,500
like image 198
Mrugesh Kadia Avatar answered Nov 14 '22 21:11

Mrugesh Kadia