import requests from fake_useragent import UserAgent from bs4 import BeautifulSoup ua = UserAgent() useragent=ua.firefox headers={'user-agent':useragent} pm=True def fanye(): guanjianci = input('需要查询的关键词:') pages=int(input('需要查询前几页:')) wangzhi=input('目标网址:') for i in range(pages): if pm==True: num=i*10 url='https://www.baidu.com/s?wd='+guanjianci+'&pn='+str(num) chaxun(wangzhi,url,i) else: break def chaxun(wangzhi,url,num): global pm r = requests.get(url,headers=headers) soup=BeautifulSoup(r.text,'lxml') tits=soup.find_all('h3',class_='t') paiming = 1 for biaoti in tits: lianjie=biaoti.a['href'] try: r = requests.get(lianjie, headers=headers, allow_redirects=True) except: paiming += 1 continue kwww=r.url # print(kwww) tuiurl=[] if kwww.find('www.baidu.com')!=-1: tuiurl.append(kwww) else: title = biaoti.text.strip() if kwww.find(wangzhi)!=-1: # title = biaoti.text.strip() print(num+1,paiming,title) pm=False break else: paiming+=1 if __name__ == '__main__': fanye()
比如输入关键词:深圳SEO,查询前10页,目标网址www.liaojinhua.com,输出结果为:4 8 深圳SEO优化技术交流分享-启明SEO博客,表示第四页,第8位,也就是48名。
下一篇: 如何提高网站用户体验和转化率?
上一篇:Python 列表(List)使用详解
评论