python模拟登录百度贴吧

科技一点鑫得 2024-03-09 00:44:06

上篇“百度扫码登录过程分析”一文中分析了扫码登录百度的过程,可是最终目的是要登录百度贴吧,我开始以为模拟这些过程就够了,但是通过python具体测试发现无法实现百度贴吧的登录。再次使用浏览器开发工具监控network页签发现,登录百度成功之后,到登录成功百度贴吧,还经历了3次重定向授权的过程,也就是状态码为302的请求。经过这三次重定向请求之后,stoken会重新更新,至此拿到的cookie才能保持贴吧的登录状态。

1 重定向请求https://tieba.baidu.com/index.html

这个请求返回的响应头的Location是下一个请求的URL,不过这个URL并没有什么特殊参数,也可以直接构造这个URL

2 重定向请求https://passport.baidu.com/v3/login/api/auth/?tpl=tb&jump=&return_type=3&u=https%3A%2F%2Ftieba.baidu.com%2Findex.html

这个请求返回的响应头的Location是下一个请求的URL

3 重定向请求https://tieba.baidu.com/index.html?errmsg=Auth+Login+Sucess&errno=0&ssnerror=0&stoken=fae26741db67c20df5c57d2a1c57bcd5c883020970f43a208e41bde8b95948edaf23e1f7d31af1700131677982f771f43e557a300232686274fd01c377ace65a6b006dd56ca7

这个请求返回的响应头中可以拿到STOKEN,将这个cookie更新到登录百度拿到的cookie中,就可以实现贴吧保持登录状态。

python模拟登录百度贴吧示例代码

综合上一篇文章的过程和本次贴吧重定向授权的分析,给出python模拟登录贴吧的示例代码如下

import reimport jsonimport uuidimport datetimeimport timeimport requestsrequests.packages.urllib3.disable_warnings()# 下面两个是引入数据库模型和数据库对象,主要是目的是保存和读取cookie# 这部分代码未列出,你可以根据自己的情况实现它:保存为文本文件from app.models import Platformfrom app import dbdef timestamp(format="ms"): if format == "ms": return int(time.time() * 1000) elif format == "s": return int(time.time())class Tieba(object): def __init__(self) -> None: ####### 数据库读取cookie start ######## # 这部分代码就是从数据库读取已经保存的cookie tieba_cache = Platform.query.filter_by(domain="tieba.com").first() if tieba_cache: if tieba_cache.cookies: self.cookies = json.loads(tieba_cache.cookies) else: self.cookies = None else: self.cookies = None ####### 数据库读取cookie end ######## self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/113.0.0.0 Safari/537.36 Edg/113.0.1774.57" } self.url_getqrcode = "https://passport.baidu.com/v2/api/getqrcode" self.url_unicast = "https://passport.baidu.com/channel/unicast" self.url_qrbdusslogin = "https://passport.baidu.com/v3/login/main/qrbdusslogin" self.imgurl = None self.sign = None self.v = None self.username = None self.is_login = False def getqrcode(self): """请求获取二维码 https://passport.baidu.com/v2/api/getqrcode?lp=pc&qrloginfrom=pc&gid=DAF0412-DA10-404B-8AA5-7DDC4AD65510&oauthLog=&callback=tangram_guid_1685610692859&apiver=v3&tt=1685610693098&tpl=tb&logPage=traceId%3Apc_loginv4_1685610693%2ClogPage%3Aloginv4&_=1685610693101 返回: tangram_guid_1685690962086({"imgurl":"passport.baidu.com\/v2\/api\/qrcode?sign=4923b183222a20bc7055eaf445b6c977&lp=pc&qrloginfrom=pc&logPage=traceId%3Apc_loginv4_1685690962%2ClogPage%3Aloginv4","errno":0,"sign":"4923b183222a20bc7055eaf445b6c977","prompt":"\u767b\u5f55\u540e\u5a01\u9a6c\u5c06\u83b7\u5f97\u767e\u5ea6\u5e10\u53f7\u7684\u516c\u5f00\u4fe1\u606f\uff08\u7528\u6237\u540d\u3001\u5934\u50cf\uff09"}) """ params = { "lp": "pc", "qrloginfrom": "pc", "gid": str(uuid.uuid4()).upper(), "callback": f"tangram_guid_{timestamp()}", "apiver": "v3", "tt": timestamp(), "tpl": "tb", "logPage": f"traceId:pc_loginv4_{timestamp('s')},logPage:loginv4", "_": timestamp() } try: res = requests.get(self.url_getqrcode, params=params, headers=self.headers) except Exception as e: print(e) if res.status_code == 200: # 提取imgurl、sign rs = re.search(r'\((\{.*?\})\)', res.text) if rs: res_data = json.loads(rs.groups()[0]) self.imgurl = f'https://{res_data["imgurl"]}' self.sign = res_data["sign"] return (self.imgurl, self.sign) def unicast(self, sign): """轮询请求二维码扫描状态:未扫描、已扫码、已登录 https://passport.baidu.com/channel/unicast?channel_id=71f1345df4e3e31fed28d5e06f09fd38&gid=5C952FC-624D-4697-8D34-38223753E15C&tpl=tb&_sdkFrom=1&callback=tangram_guid_1685690962087&apiver=v3&tt=1685692040424&_=1685692040424 未扫描: tangram_guid_1685690962087({"errno":1}) 已扫描码: tangram_guid_1685690962086({"errno":0,"channel_id":"1e3ada4d00d0567d2fedb3c2e9bd5634","channel_v":"{\"status\":1}"}) 已登录: tangram_guid_1685690962086({"errno":0,"channel_id":"1e3ada4d00d0567d2fedb3c2e9bd5634","channel_v":"{\"status\":0,\"v\":\"e96700331e2438cff748befcc8957969\",\"u\":\"\"}"}) """ self.sign = sign params ={ "channel_id": self.sign, "gid": str(uuid.uuid4()).upper(), "tpl": "tb", "_sdkFrom": "1", "callback": f"tangram_guid_{timestamp()}", "apiver": "v3", "tt": timestamp(), "_": timestamp(), } start = time.time() try: res = requests.get(self.url_unicast, params=params, headers=self.headers) except Exception as e: print(e) end = time.time() print(f"轮询请求耗时:{int(end-start)}秒") if res.status_code == 200: rs = re.search(r'\((\{.*?\})\)', res.text) if rs: res_data = json.loads(rs.groups()[0]) print(res_data) if "channel_v" in res_data: if "v" in res_data["channel_v"]: channel_v = json.loads(res_data["channel_v"]) self.v = channel_v["v"] print("扫码登录成功") return res_data def qrbdusslogin(self, v): """百度账号登录 https://passport.baidu.com/v3/login/main/qrbdusslogin?v=1685670178870&bduss=2eef858b34827ade3bffec6acbd5a0f9&u=&loginVersion=v4&qrcode=1&tpl=tb&apiver=v3&tt=1685670178870&traceid=&time=1685670179&alg=v3&sig=VkdBbVBUWVhZSXhtT1NzNVZTQ3ZOMGs0TDlDaWg4N1lRM0RVc0JaVzNkbEYrbW5RbkVVQTdJSWswV0tNNDB0Rg%3D%3D&elapsed=13&shaOne=00d5d7019720edf3cd7113a57273eec2451e8971&rinfo=%7B%22fuid%22%3A%22eb7e862fec8d30ff2f0d15c44eb2b7c7%22%7D&callback=bd__cbs__j8sojo """ self.v = v params = { "v": "1685670178870", "bduss": self.v, "u": "", "loginVersion": "v4", "qrcode": "1", "tpl": "tb", "apiver": "v3", "tt": timestamp("ms"), "traceid": "", "time": timestamp("s"), "alg": "v3", "sig": "VkdBbVBUWVhZSXhtT1NzNVZTQ3ZOMGs0TDlDaWg4N1lRM0RVc0JaVzNkbEYrbW5RbkVVQTdJSWswV0tNNDB0Rg==", "elapsed": "13", "shaOne": "00d5d7019720edf3cd7113a57273eec2451e8971", "rinfo": '{"fuid":"eb7e862fec8d30ff2f0d15c44eb2b7c7"}', "callback": "bd__cbs__j8sojo" } try: res = requests.get(self.url_qrbdusslogin, params=params, headers=self.headers) except Exception as e: print(e) if res.status_code == 200: # 保存cookies self.cookies = requests.utils.dict_from_cookiejar(res.cookies) # 提取用户名信息 rs = re.search(r'\"displayName\"\:\s*\"([^"]*)\"', res.text) if rs: self.username = rs.groups()[0] self.is_login = True def login(self): """贴吧登录:从百度账号到贴吧登录需要重定向重新获取stoken之后才能获得正确的授权 """ # 从响应头获取重定向Location url_jump = "https://passport.baidu.com/v3/login/api/auth/?tpl=tb&jump=&return_type=3&u=https%3A%2F%2Ftieba.baidu.com%2Findex.html" url_stoken = "" try: res = requests.get(url_jump, headers=self.headers, cookies=self.cookies, verify=False, allow_redirects=False) if res.status_code == 302: url_stoken = res.headers["Location"] except Exception as e: print(e) # 从响应头Set-Cookie中提取STOKEN try: res = requests.get(url_stoken, headers=self.headers, cookies=self.cookies, verify=False, allow_redirects=False) if res.status_code == 302: stoken = re.search(r"STOKEN=([^;]+);", res.headers["Set-Cookie"]) stoken = stoken.groups()[0] self.cookies["STOKEN"] = stoken ####### 写入cookie到数据 start ######## # 代码执行到这里表示已经登录成功了,这部分代码是将cookie保存到数据库中,你也可以将其保存到文件以备下次加载 # 新增或更新cookie到数据库 tieba_cache = Platform.query.filter_by(domain="tieba.com").first() str_cookies = json.dumps(self.cookies) if tieba_cache: # 更新 tieba_cache.cookies = str_cookies tieba_cache.username = self.username tieba_cache.update_at = datetime.datetime.now() else: # 插入 row = Platform(domain="tieba.com", username=self.username, cookies=str_cookies) db.session.add(row) db.session.commit() ####### 写入cookie到数据 end ######## except Exception as e: print(e) def checkLogin(self): """检查cookie是否有效,cookie有效应可以获取登录用户名信息 """ url_userinfo = f"https://tieba.baidu.com/f/user/json_userinfo?_={timestamp()}" try: res = requests.get(url_userinfo, headers=self.headers, cookies=self.cookies, verify=False, allow_redirects=False, timeout=1) if "is_login" in res.text: userinfo = json.loads(res.text) if "data" in userinfo and "user_name_show" in userinfo["data"]: self.is_login = True self.username = userinfo["data"]["user_name_show"] else: self.is_login = False self.username = None except Exception as e: print(e) self.is_login = False return self.is_loginif __name__ == "__main__": # 模拟登录运行过程 tb = Tieba() tb.getqrcode() print(f"打开二维码URL并扫码: {tb.imgurl}") tb.unicast(tb.sign) tb.login() tb.checkLogin()
0 阅读:0

科技一点鑫得

简介:感谢大家的关注