豆瓣爬某个贴子的所有刮刮乐答案python代码

请直接複製

cookie可能会过期，如果过期，请开启你的豆瓣网页，在搜索栏查question，然后再随意点击一个id，在Header中得到Cookie複製并更新即可运行代码

from selenium import webdriver

from selenium.webdriver.common.by import By

from selenium.webdriver.chrome.options import Options

import time

import requests

import json

import urllib.parse

# 设置Chrome选项，启用Headless模式

chrome_options = Options()

chrome_options.add_argument('--headless') # 不显示浏览器界面

# 初始化 WebDriver

driver = webdriver.Chrome(options=chrome_options)

# 让用户输入网址

url = input("请输入目标网页的网址：")

# 打开目标网页

driver.get(url)

# 等待页面加载

time.sleep(5)

# 从页面中提取所有的data-id

data_id_elements = driver.find_elements(By.XPATH, "//*[@data-id]")

# 提取所有的data-id，并过滤非7位数的

data_ids = [

element.get_attribute("data-id") for element in data_id_elements

if len(element.get_attribute("data-id")) == 7 and element.get_attribute("data-id").isdigit()

]

# 打印出所有符合条件的data-id

print(f"Found valid 7-digit data-ids: {data_ids}")

# 请求头信息，用于模拟浏览器请求

headers = {

'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',

'Accept': 'application/json, text/plain, */*',

'Accept-Encoding': 'gzip, deflate, br',

'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',

'Origin': 'https://www.douban.com',

'Referer': url, # Referer设置为当前页面

'Cookie': 'll="108288"; bid=jP_4GbIX6kY; viewed="27074037_1064707_26286751_1075531_1274150_35812342_3260802_4088776_26883044_1227981"; __utmv=30149280.17175; __utmz=30149280.1718796391.6.4.utmcsr=caiyawang.xyz|utmccn=(referral)|utmcmd=referral|utmcct=/; __utma=30149280.2101830697.1717257027.1718796391.1719232678.7; douban-fav-remind=1; ap_v=0,6.0; ct=y; dbcl2="171753000:NVy4yxfFedE"; ck=1unP; push_noty_num=0; push_doumail_num=0; frodotk="1aecf9763222a8cc50c6ec629667fa64"', # 替换成真实的cookie

}

# 定义获取correct_answer的函数

def get_correct_answer(data_id):

# 构造API请求URL

url = f"https://m.douban.com/rexxar/api/v2/ceorl/poll/question/{data_id}?ck=1unP"

try:

# 发送GET请求

response = requests.get(url, headers=headers)

# 检查响应是否成功

if response.status_code == 200:

# 解析JSON响应

response_data = response.json()

correct_answer_unicode = response_data.get("correct_answer", "")

if correct_answer_unicode:

# 解码Unicode字符

decoded_answer = urllib.parse.unquote(correct_answer_unicode)

return decoded_answer

else:

return "No correct_answer available"

else:

return f"Failed to fetch data for ID {data_id}, Status Code: {response.status_code}"

except requests.exceptions.RequestException as e:

return f"Request failed for ID {data_id}: {e}"

# 遍历所有的data-id并获取correct_answer

for data_id in data_ids:

print(f"Fetching correct_answer for data-id: {data_id}")

correct_answer = get_correct_answer(data_id)

print(f"Correct Answer: {correct_answer}")

# 关闭浏览器

driver.quit()

夭柳

搜索此博客

豆瓣爬某个贴子的所有刮刮乐答案python代码

评论

发表评论

此博客中的热门博文

三无(无字幕无台标无水印)片源的获得

第三个剪辑计划(進度:刚找完素材)

一键清空微博教程