diff --git a/python/微博热搜.py b/python/微博热搜.py new file mode 100644 index 0000000..a7c14da --- /dev/null +++ b/python/微博热搜.py @@ -0,0 +1,37 @@ +import requests +import pandas as pd +import datetime + + +browse_header = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' +} + +url = "https://weibo.com/ajax/side/hotSearch" + +res = requests.get(url, headers=browse_header).json() +# 实时上升热点 +content_list = res['data']['realtime'] +title_list = [] +order_list = [] +score_list = [] +desc_list = [] +index = 0 +for content in content_list: + index += 1 + order_list.append(content['rank']) + title_list.append(content['word']) + score_list.append(content['num']) + desc_list.append(content['note']) + +df = pd.DataFrame({ + '热搜标题': title_list, + '热搜排名': order_list, + '热搜热度': score_list, + '描述': desc_list, +}) +print(datetime.datetime.now()) +current_datetime = datetime.datetime.now() +xdate=current_datetime.strftime("%Y-%m-%d-%H-%M-%S") +df.to_excel(f'../数据汇总/微博/{xdate}微博热搜榜.xlsx', index=False) # 保存结果数据 + diff --git a/python/抖音热搜.py b/python/抖音热搜.py new file mode 100644 index 0000000..638c4a1 --- /dev/null +++ b/python/抖音热搜.py @@ -0,0 +1,44 @@ +import requests +import pandas as pd +import datetime + + +browse_header = { + "Accept": "application/json, text/plain, */*", + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/100.0.4896.60 Safari/537.36", + "Host": "www.douyin.com", + "Referer": "https://www.douyin.com/discover", + "Cookie": "_xsrf=Pd0NpG6J8kZdHtzBVnNyQP1g0rO7NKeg; _zap=d7f27b9f-4fe3-4ef4-9376-df278af16940;" +} + +url = "https://www.douyin.com/aweme/v1/web/hot/search/list/?device_platform=webapp&aid=6383&channel=channel_pc_web" + +res = requests.get(url, headers=browse_header).json() +# 实时上升热点 +content_list = res['data']['word_list'] +title_list = [] +order_list = [] +score_list = [] +desc_list = [] +url_list = [] +index = 0 +for content in content_list: + index += 1 + order_list.append(content['position']) + title_list.append(content['word']) + score_list.append(content['hot_value']) + desc_list.append(content['word']) + url_list.append(f"https://www.douyin.com/hot/{content['sentence_id']}") + +df = pd.DataFrame({ + '热搜标题': title_list, + '热搜排名': order_list, + '热搜热度': score_list, + '描述': desc_list, + '链接地址': url_list +}) +print(datetime.datetime.now()) +current_datetime = datetime.datetime.now() +xdate=current_datetime.strftime("%Y-%m-%d-%H-%M-%S") +df.to_excel(f'../数据汇总/抖音/{xdate}抖音热搜榜.xlsx', index=False) # 保存结果数据 + diff --git a/python/百度热搜.py b/python/百度热搜.py new file mode 100644 index 0000000..216bf84 --- /dev/null +++ b/python/百度热搜.py @@ -0,0 +1,34 @@ +import requests +import pandas as pd +import datetime + + +browse_header = { + 'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/116.0.0.0 Safari/537.36' +} + +url = "https://tenapi.cn/v2/baiduhot" + +res = requests.get(url, headers=browse_header).json() +# 实时上升热点 +content_list = res['data'] +title_list = [] +score_list = [] +desc_list = [] +index = 0 +for content in content_list: + index += 1 + title_list.append(content['name']) + score_list.append(content['hot']) + desc_list.append(content['url']) + +df = pd.DataFrame({ + '热搜标题': title_list, + '热搜热度': score_list, + '热搜链接': desc_list, +}) +print(datetime.datetime.now()) +current_datetime = datetime.datetime.now() +xdate=current_datetime.strftime("%Y-%m-%d-%H-%M-%S") +df.to_excel(f'../数据汇总/百度/{xdate}百度热搜榜.xlsx', index=False) # 保存结果数据 +