import requests
import re
url = 'http://sis001.com/forum/forum-25-1.html'
response = requests.get(url)
response.encoding = 'GBK'
html = response.text
a = re.findall(r'<tbody id="normalthread.*?</tbody>', html, re.S)
for i in a:
b = re.findall(r'<span id="thread.*?"><a href="(.*?)">(.*?)</a></span>', i)[0]
print(b)
欢迎光临 SiS001! Board - [第一会所 关闭注册] (http://67.220.91.20/forum/) | Powered by Discuz! 7.2 |