51jobSpider
from fake_useragent import UserAgent
from urllib import parse
import requests, re, json, csv
class Job51Spider(object):
def __init__(self):
# 清空csv文件内容, 写入表头
with open('51job.csv', 'w', encoding='utf-8', newline='') as f:
reader = csv.writer(f)
reader.writerow(['职位名称', '公司名称', '薪资', '工作地点', '职位发布日期', '公司类型', '公司福利', '职位技能', '公司业务', '公司规模'])
self.headers = {
'User-Agent': UserAgent().random,
"Host": "search.51job.com",
"Accept": "application / json, text / javascript, * / *; q = 0.01",
'Referer': 'https://search.51job.com/list/020000,000000,0000,00,9,99,%25E6%2591%2584%25E5%25BD%25B1%25E5%25B8%2588,2,9.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=',
'Cookie': "_uab_collina=166011585344663273017034; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60020000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60020000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C9%E3%D3%B0%CA%A6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; slife=lastvisit%3D020000%26%7C%26; partner=cn_bing_com; privacy=1660290871; guid=6360a8ae5a33b6fde8aa3a55c6a79fd4; acw_sc__v2=62f6073ce84e990f8d0a360f4118be99301d0ac7; SL_G_WPT_TO=zh-CN; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%226360a8ae5a33b6fde8aa3a55c6a79fd4%22%2C%22first_id%22%3A%22182869d9d3e16c-07d5ca338d1937-45647f50-1327104-182869d9d3fb12%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTgyODY5ZDlkM2UxNmMtMDdkNWNhMzM4ZDE5MzctNDU2NDdmNTAtMTMyNzEwNC0xODI4NjlkOWQzZmIxMiIsIiRpZGVudGl0eV9sb2dpbl9pZCI6IjYzNjBhOGFlNWEzM2I2ZmRlOGFhM2E1NWM2YTc5ZmQ0In0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%226360a8ae5a33b6fde8aa3a55c6a79fd4%22%7D%2C%22%24device_id%22%3A%22182869d9d3e16c-07d5ca338d1937-45647f50-1327104-182869d9d3fb12%22%7D; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; acw_tc=ac11000116602929812648254e00e1d11f6240b675be3361abad3b7b7d4f96; ssxmod_itna=iqmxBD0DcDg037Ke0LxYIEP7wKr7ieqDyG07QEAmx0vc3GzDAxn40iDt=rNmQrBrx4qw7oebIY87OiuDH3Ox2RTejAl84GLDmKDy4=xPGG0xBYDQxAYDGDDPDogPD1D3qDkD7h6CMy1qGWDm4kDWPDYxDrjOKDRxi7DDvQCx07DQ5k8DekoRawcagCimAPF0KD91oDsE0fY0FmjS34MpYEI3YdIx0kl40Oya5szaoDUlFsBoBNNoReWC0ei0Rxi7Dxw7DY8m2D8mG9qADxO70Dvlxq8vxn6wDDioWPOYD===; ssxmod_itna2=iqmxBD0DcDg037Ke0LxYIEP7wKr7ieqDyG07QEADn93uYqDsqq3DLA18V3N7qgLiQYF9E2Kx=i+aeh=dRG7ugGpwiFqgIO9DnR=4YFFj0cNw+gCwP4Vn7o6y+RC96=M75LbMgdB6146kSfYq7bUvYYtMjEfqaTh52giKyfEviGR+iMbdRjU+GYRdoi2d=lbtre=iTWTkWa=SFaTqlEn2I8bESb3OU+01B3ioru5cGLcrcawPvqamz3bcG=tR/lEvF+8n4zUBUTKXp98CFxG24gh74LrKtboYtbZYoGjy2r/+jDavWA37rDPCPAYiIWmVoprY0LQ+xDGcDG7YiDD="
}
def get_page_info(self, position):
for i in range(1, 2):
self.url = 'https://search.51job.com/list/020000,000000,0000,00,9,99,{},2,{}.html?lang=c&postchannel=0000&workyear=99&cotype=99°reefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='.format(
position, i)
res = requests.get(self.url, headers=self.headers)
re_obj = re.compile(r'window.__SEARCH_RESULT__ = (.*?)</script>', re.S)
data = json.loads(re.search(re_obj, res.text).group(1))
for i in data['engine_jds']:
# 职位名称
job_name = i['job_name']
# 公司名称
company_name = i['company_name']
# 薪资
providesalary_text = i['providesalary_text']
# 工作地点
workarea_text = i['workarea_text']
# 职位发布日期
issuedate = i['issuedate']
# 公司类型
companytype_text = i['companytype_text']
# 公司福利
jobwelf = i['jobwelf']
# 职位技能
attribute_text = i['attribute_text']
# 公司业务
companyind_text = i['companyind_text']
# 公司规模
companysize_text = i['companysize_text']
ls = [job_name, company_name, providesalary_text, workarea_text, issuedate, companytype_text, jobwelf,
attribute_text, companyind_text, companysize_text]
with open('51job.csv', 'a', encoding='utf-8', newline='') as f:
reader = csv.writer(f)
reader.writerow(ls)
s = Job51Spider()
name = input('请输入要搜索的职业:')
position = '%25'.join(parse.quote(name).split('%'))
s.get_page_info(position)