51jobSpider

from fake_useragent import UserAgent
from urllib import parse
import requests, re, json, csv


class Job51Spider(object):
 def __init__(self):
 # 清空csv文件内容， 写入表头
 with open('51job.csv', 'w', encoding='utf-8', newline='') as f:
 reader = csv.writer(f)
 reader.writerow(['职位名称', '公司名称', '薪资', '工作地点', '职位发布日期', '公司类型', '公司福利', '职位技能', '公司业务', '公司规模'])
 self.headers = {
 'User-Agent': UserAgent().random,
 "Host": "search.51job.com",

 "Accept": "application / json, text / javascript, * / *; q = 0.01",
 'Referer': 'https://search.51job.com/list/020000,000000,0000,00,9,99,%25E6%2591%2584%25E5%25BD%25B1%25E5%25B8%2588,2,9.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare=',
 'Cookie': "_uab_collina=166011585344663273017034; nsearch=jobarea%3D%26%7C%26ord_field%3D%26%7C%26recentSearch0%3D%26%7C%26recentSearch1%3D%26%7C%26recentSearch2%3D%26%7C%26recentSearch3%3D%26%7C%26recentSearch4%3D%26%7C%26collapse_expansion%3D; search=jobarea%7E%60020000%7C%21ord_field%7E%600%7C%21recentSearch0%7E%60020000%A1%FB%A1%FA000000%A1%FB%A1%FA0000%A1%FB%A1%FA00%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA99%A1%FB%A1%FA9%A1%FB%A1%FA99%A1%FB%A1%FA%A1%FB%A1%FA0%A1%FB%A1%FA%C9%E3%D3%B0%CA%A6%A1%FB%A1%FA2%A1%FB%A1%FA1%7C%21; slife=lastvisit%3D020000%26%7C%26; partner=cn_bing_com; privacy=1660290871; guid=6360a8ae5a33b6fde8aa3a55c6a79fd4; acw_sc__v2=62f6073ce84e990f8d0a360f4118be99301d0ac7; SL_G_WPT_TO=zh-CN; sensorsdata2015jssdkcross=%7B%22distinct_id%22%3A%226360a8ae5a33b6fde8aa3a55c6a79fd4%22%2C%22first_id%22%3A%22182869d9d3e16c-07d5ca338d1937-45647f50-1327104-182869d9d3fb12%22%2C%22props%22%3A%7B%22%24latest_traffic_source_type%22%3A%22%E7%9B%B4%E6%8E%A5%E6%B5%81%E9%87%8F%22%2C%22%24latest_search_keyword%22%3A%22%E6%9C%AA%E5%8F%96%E5%88%B0%E5%80%BC_%E7%9B%B4%E6%8E%A5%E6%89%93%E5%BC%80%22%2C%22%24latest_referrer%22%3A%22%22%7D%2C%22identities%22%3A%22eyIkaWRlbnRpdHlfY29va2llX2lkIjoiMTgyODY5ZDlkM2UxNmMtMDdkNWNhMzM4ZDE5MzctNDU2NDdmNTAtMTMyNzEwNC0xODI4NjlkOWQzZmIxMiIsIiRpZGVudGl0eV9sb2dpbl9pZCI6IjYzNjBhOGFlNWEzM2I2ZmRlOGFhM2E1NWM2YTc5ZmQ0In0%3D%22%2C%22history_login_id%22%3A%7B%22name%22%3A%22%24identity_login_id%22%2C%22value%22%3A%226360a8ae5a33b6fde8aa3a55c6a79fd4%22%7D%2C%22%24device_id%22%3A%22182869d9d3e16c-07d5ca338d1937-45647f50-1327104-182869d9d3fb12%22%7D; SL_GWPT_Show_Hide_tmp=1; SL_wptGlobTipTmp=1; acw_tc=ac11000116602929812648254e00e1d11f6240b675be3361abad3b7b7d4f96; ssxmod_itna=iqmxBD0DcDg037Ke0LxYIEP7wKr7ieqDyG07QEAmx0vc3GzDAxn40iDt=rNmQrBrx4qw7oebIY87OiuDH3Ox2RTejAl84GLDmKDy4=xPGG0xBYDQxAYDGDDPDogPD1D3qDkD7h6CMy1qGWDm4kDWPDYxDrjOKDRxi7DDvQCx07DQ5k8DekoRawcagCimAPF0KD91oDsE0fY0FmjS34MpYEI3YdIx0kl40Oya5szaoDUlFsBoBNNoReWC0ei0Rxi7Dxw7DY8m2D8mG9qADxO70Dvlxq8vxn6wDDioWPOYD===; ssxmod_itna2=iqmxBD0DcDg037Ke0LxYIEP7wKr7ieqDyG07QEADn93uYqDsqq3DLA18V3N7qgLiQYF9E2Kx=i+aeh=dRG7ugGpwiFqgIO9DnR=4YFFj0cNw+gCwP4Vn7o6y+RC96=M75LbMgdB6146kSfYq7bUvYYtMjEfqaTh52giKyfEviGR+iMbdRjU+GYRdoi2d=lbtre=iTWTkWa=SFaTqlEn2I8bESb3OU+01B3ioru5cGLcrcawPvqamz3bcG=tR/lEvF+8n4zUBUTKXp98CFxG24gh74LrKtboYtbZYoGjy2r/+jDavWA37rDPCPAYiIWmVoprY0LQ+xDGcDG7YiDD="
 }

 def get_page_info(self, position):
 for i in range(1, 2):
 self.url = 'https://search.51job.com/list/020000,000000,0000,00,9,99,{},2,{}.html?lang=c&postchannel=0000&workyear=99&cotype=99&degreefrom=99&jobterm=99&companysize=99&ord_field=0&dibiaoid=0&line=&welfare='.format(
 position, i)
 res = requests.get(self.url, headers=self.headers)
 re_obj = re.compile(r'window.__SEARCH_RESULT__ = (.*?)</script>', re.S)
 data = json.loads(re.search(re_obj, res.text).group(1))

 for i in data['engine_jds']:
 # 职位名称
 job_name = i['job_name']
 # 公司名称
 company_name = i['company_name']
 # 薪资
 providesalary_text = i['providesalary_text']
 # 工作地点
 workarea_text = i['workarea_text']
 # 职位发布日期
 issuedate = i['issuedate']
 # 公司类型
 companytype_text = i['companytype_text']
 # 公司福利
 jobwelf = i['jobwelf']
 # 职位技能
 attribute_text = i['attribute_text']
 # 公司业务
 companyind_text = i['companyind_text']
 # 公司规模
 companysize_text = i['companysize_text']
 ls = [job_name, company_name, providesalary_text, workarea_text, issuedate, companytype_text, jobwelf,
 attribute_text, companyind_text, companysize_text]
 with open('51job.csv', 'a', encoding='utf-8', newline='') as f:
 reader = csv.writer(f)
 reader.writerow(ls)


s = Job51Spider()
name = input('请输入要搜索的职业：')
position = '%25'.join(parse.quote(name).split('%'))
s.get_page_info(position)
<上一篇
下一篇>