from bs4 import BeautifulSoup
import urllib2
url = 'https://www.douban.com'
cookie='ll="118234"; __yadk_uid=FZYkMR92OctgDfVQxh7rgOvKAfSaAcF1; gr_user_id=30-b429-d8ac2b39f39e; _vwo_uuid_v2=62C802065BA1FE1E49689EB42248C9B5|86bc597a128b6ebcf16129a36961cd49; bid=Ogc8aq4tIsk; _pk_ref.100001.8cb4=%5B%22%22%2C%22%22%2C1519356140%2C%22https%3A%2F%2Fwww.baidu.com%2Flink%3Furl%3DynTCvqw85IEmoWlag4b0hClM5qTjixjEN46Bbi_l7O1HuW1WreuRM_BxXp7M6Dyo%26wd%3D%26eqid%3Dd6c4a5f10001bb85000000025a8f88e8%22%5D; _pk_ses.100001.8cb4=*; __utma=30149280.832780041.1482799300.1517562754.1519356141.20; __utmc=30149280; __utmz=30149280.1519356141.20.19.utmcsr=baidu|utmccn=(organic)|utmcmd=organic; __utmt=1; _ga=GA1.2.832780041.1482799300; _gid=GA1.2.925059532.1519356149; _gat_UA-7019765-1=1; dbcl2="162182190:W4cfAVJjlD0"; ck=Pky-; _pk_id.100001.8cb4=599b9f4c8e87f346.1482799300.14.1519356150.1514105301.; push_noty_num=0; push_doumail_num=0; __utmv=30149280.16218; __utmb=30149280.3.10.1519356141'
send_headers = {
'Host':'www.douban.com',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36',
'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Connection':'keep-alive',
'Cookie': cookie
}
req = urllib2.Request(url,headers=send_headers)
page = urllib2.urlopen(req)
soup = BeautifulSoup(page,'lxml')
print soup.original_encoding
print (soup).encode('gb18030')
file = open("title.txt","w")
file.write(str(soup))
file.close()
print 'ok'
转载自阿飞的技术仓库