WAP网站是啥?wap是移动端还是手机端?

WAP网站是啥?wap是移动端还是手机端?python百度下拉框关键词采集源码:

01 #百度下拉框关键词采集
02 import requests
03 import json
04 import urllib
05
06 def get_keywords(word): #获取下拉词json数据并返回数组
08     html=requests.get(url)
09     html=html.json()
10     #print(html)
11     #print(html['g'])
12     key_words=[]
13     for key_word in html['g']:
14         print(key_word['q'])
15         key_words.append(key_word['q'])
16     #print(key_words)
17     return key_words
18
19 def get_sug(word): #获取下拉词json数据并返回数组2
21     r = requests.get(url, verify=False# 请求API接口,取消了HTTPS验证
22     cont = r.content  # 获取返回的内容
23     res = cont[41: -2].decode('gbk'# 只取返回结果中json格式一段,并且解码为unicode
24     res_json = json.loads(res)  # json格式转换
25     return res_json['s'# 返回关键词列表
26
27 def get_word(word): #获取下拉词json数据并返回数组3
28     url=f'http://suggestion.baidu.com/su?wd={word}&sugmode=3&json=1'
29     html=requests.get(url).text
30     html=html.replace("window.baidu.sug(",'')
31     html = html.replace(")", '')
32     html = html.replace(";", '')
33     #print(html)
34     html = json.loads(html)
35     key_words=html['s']
36     #print(key_words)
37     return key_words
38
39 def get_word_scv(): #获取下拉词json数据并写入excel
40     opencsv=open('word.csv','a+')
41     for word in open('gjc.txt',encoding='utf-8'):
42         print(urllib.parse.quote_plus(word))
44         html=requests.get(url).text
45         html=html.replace('window.baidu.sug(','')
46         html=html.replace(');','')
47         #print (html)
48         html=json.loads(html)
49         #print (html['s'])
50         for i in html['s']:
51             print (i)
52             opencsv.write('%s\n'%i)
53
54 def get_more_word(word): #查询所以下拉词并去除重复
55     more_word=[]
56     for i in 'abcdefghijklmnopqrstuvwxyz':
57         more_word.extend(get_keywords('%s%s'%(word,i)))
58     print(more_word)
59     print(len(more_word))
60     print(len(list(set(more_word))))
61     return list(set(more_word))  #去重操作
62
63
64 def get_more_sug(word): #查询所以下拉词并去除重复
65     all_words = []
66     for i in 'abcdefghijklmnopqrstuvwxyz':
67         all_words += get_sug(word+i)  # 遍历字母表 | 利用了上一个函数
68     print(len(list(set(all_words))))
69     return list(set(all_words))  # 去重

提供多种python百度下拉框关键词采集方式,基于百度API接口实现,可导出到Excel表格,本文提供4个采集函数及两个汇总函数,根据自己的需求灵活使用。

原创文章,作者:凌哥SEO,如若转载,请注明出处:https://www.seoxyg.com/6927.html

发表评论

登录后才能评论