Python
import requests
from fake_useragent import UserAgent
from bs4 import BeautifulSoup
ua = UserAgent()
useragent=ua.firefox
headers={'user-agent':useragent}
pm=True
def fanye():
guanjianci = input('需要查询的关键词:')
pages=int(input('需要查询前几页:'))
wangzhi=input('目标网址:')
for i in range(pages):
if pm==True:
num=i*10
url='https://www.baidu.com/s?wd='+guanjianci+'&pn='+str(num)
chaxun(wangzhi,url,i)
else:
break
def chaxun(wangzhi,url,num):
global pm
r = requests.get(url,headers=headers)
soup=BeautifulSoup(r.text,'lxml')
tits=soup.find_all('h3',class_='t')
paiming = 1
for biaoti in tits:
lianjie=biaoti.a['href']
try:
r = requests.get(lianjie, headers=headers, allow_redirects=True)
except:
paiming += 1
continue
kwww=r.url
# print(kwww)
tuiurl=[]
if kwww.find('www.baidu.com')!=-1:
tuiurl.append(kwww)
else:
title = biaoti.text.strip()
if kwww.find(wangzhi)!=-1:
# title = biaoti.text.strip()
print(num+1,paiming,title)
pm=False
break
else:
paiming+=1
if __name__ == '__main__':
fanye()
比如输入关键词:深圳SEO,查询前10页,目标网址www.liaojinhua.com,输出结果为:4 8 深圳SEO优化技术交流分享-启明SEO博客,表示第四页,第8位,也就是48名。
下一篇: 如何提高网站用户体验和转化率?
上一篇:Python 列表(List)使用详解
评论