请直接複製
cookie可能会过期,如果过期,请开启你的豆瓣网页,在搜索栏查question,然后再随意点击一个id,在Header中得到Cookie複製并更新即可运行代码
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.chrome.options import Options
import time
import requests
import json
import urllib.parse
# 设置Chrome选项,启用Headless模式
chrome_options = Options()
chrome_options.add_argument('--headless') # 不显示浏览器界面
# 初始化 WebDriver
driver = webdriver.Chrome(options=chrome_options)
# 让用户输入网址
url = input("请输入目标网页的网址:")
# 打开目标网页
driver.get(url)
# 等待页面加载
time.sleep(5)
# 从页面中提取所有的data-id
data_id_elements = driver.find_elements(By.XPATH, "//*[@data-id]")
# 提取所有的data-id,并过滤非7位数的
data_ids = [
element.get_attribute("data-id") for element in data_id_elements
if len(element.get_attribute("data-id")) == 7 and element.get_attribute("data-id").isdigit()
]
# 打印出所有符合条件的data-id
print(f"Found valid 7-digit data-ids: {data_ids}")
# 请求头信息,用于模拟浏览器请求
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/131.0.0.0 Safari/537.36',
'Accept': 'application/json, text/plain, */*',
'Accept-Encoding': 'gzip, deflate, br',
'Accept-Language': 'zh-TW,zh;q=0.9,en-US;q=0.8,en;q=0.7',
'Origin': 'https://www.douban.com',
'Referer': url, # Referer设置为当前页面
'Cookie': 'll="108288"; bid=jP_4GbIX6kY; viewed="27074037_1064707_26286751_1075531_1274150_35812342_3260802_4088776_26883044_1227981"; __utmv=30149280.17175; __utmz=30149280.1718796391.6.4.utmcsr=caiyawang.xyz|utmccn=(referral)|utmcmd=referral|utmcct=/; __utma=30149280.2101830697.1717257027.1718796391.1719232678.7; douban-fav-remind=1; ap_v=0,6.0; ct=y; dbcl2="171753000:NVy4yxfFedE"; ck=1unP; push_noty_num=0; push_doumail_num=0; frodotk="1aecf9763222a8cc50c6ec629667fa64"', # 替换成真实的cookie
}
# 定义获取correct_answer的函数
def get_correct_answer(data_id):
# 构造API请求URL
url = f"https://m.douban.com/rexxar/api/v2/ceorl/poll/question/{data_id}?ck=1unP"
try:
# 发送GET请求
response = requests.get(url, headers=headers)
# 检查响应是否成功
if response.status_code == 200:
# 解析JSON响应
response_data = response.json()
correct_answer_unicode = response_data.get("correct_answer", "")
if correct_answer_unicode:
# 解码Unicode字符
decoded_answer = urllib.parse.unquote(correct_answer_unicode)
return decoded_answer
else:
return "No correct_answer available"
else:
return f"Failed to fetch data for ID {data_id}, Status Code: {response.status_code}"
except requests.exceptions.RequestException as e:
return f"Request failed for ID {data_id}: {e}"
# 遍历所有的data-id并获取correct_answer
for data_id in data_ids:
print(f"Fetching correct_answer for data-id: {data_id}")
correct_answer = get_correct_answer(data_id)
print(f"Correct Answer: {correct_answer}")
# 关闭浏览器
driver.quit()
评论
发表评论