'''
开发一个给大百度的接口,各种要求,写一个xml文件,倒是不是很难
'''
import xml,datetime,codecs
import xml.dom.minidom as minidom
def covert_to_unicode(msg):
'''''将转入的编码转换为unicode,只接受utf-8和unicode编码'''
__re_str = None
if isinstance(msg, unicode):
__re_str = msg
elif isinstance(msg, str):
try:
__re_str = msg.decode('utf-8')
except Exception, errinfo:
raise Exception, '%s,%s' % (errinfo, str(msg))
else:
raise Exception, '%s 必须为str或unicode类型' % msg
return __re_str
class CreateCovXml():
def __init__(self, cov_xml_path):
self.__cov_path = cov_xml_path
self.__dom = None
self.__root = None
def _covert_code(self, msg):
return covert_to_unicode(msg)
def __create_new_node(self, node_name, node_text = None):
'''''为xml文件添加一个结点,其中node_text可以为空'''
if self.__dom == None:
raise Exception, '创建结点时,document对象还不存在'
# 创建的是结点只有结点名
if None == node_text:
return self.__dom.createElement(self._covert_code(node_name))
else:
newNode = self.__dom.createElement(self._covert_code(node_name))
newText = self.__dom.createTextNode(self._covert_code(node_text))
newNode.appendChild(newText)
return newNode
def begin_cov(self):
'''''begin_cov(self) --> None 创建coverage.xml文档和根结点'''
try:
impl = minidom.getDOMImplementation()
self.__dom = impl.createDocument(None, u'urlset', None)
self.__root = self.__dom.documentElement
except:
traceback.print_exc()
raise Exception, '创建coverage xml根结点失败'
def add_cov(self, url, title, priority, Horizontalpicture1,dayRetrievalstatus,channel,Largeblock,smallblock,blocknumber):
if self.__root == None:
raise Exception, '创建结点时,root结点不存在对象不存不存在'
urlstart_node = self.__create_new_node("url")
loc_node = self.__create_new_node("loc")
lastmod_node = self.__create_new_node("lastmod")
changefreq_node = self.__create_new_node("changefreq")
priority_node = self.__create_new_node("priority",priority)
data_node = self.__create_new_node("data")
display_node = self.__create_new_node("display")
channel_node = self.__create_new_node("channel",channel)
Largeblock_node = self.__create_new_node("Largeblock",Largeblock)
smallblock_node = self.__create_new_node("smallblock",smallblock)
blocknumber_node = self.__create_new_node("blocknumber",blocknumber)
title_node = self.__create_new_node("title",title)
subtitle_node = self.__create_new_node("subtitle")
url_node = self.__create_new_node("url",url)
Horizontalpicture1_node = self.__create_new_node("Horizontalpicture1",Horizontalpicture1)
Horizontalpicture2_node = self.__create_new_node("Horizontalpicture2")
Horizontalpicture3_node = self.__create_new_node("Horizontalpicture3")
singer_node = self.__create_new_node("singer")
area_node = self.__create_new_node("area")
timelength_node = self.__create_new_node("timelength")
episode_node = self.__create_new_node("episode")
updatetime_node = self.__create_new_node("updatetime")
briefintroduction_node = self.__create_new_node("briefintroduction")
daySearchvolume_node = self.__create_new_node("daySearchvolume")
dayRetrievalstatus_node = self.__create_new_node("dayRetrievalstatus",dayRetrievalstatus)
sitename_node = self.__create_new_node("sitename")
self.__root.appendChild(urlstart_node)
urlstart_node.appendChild(loc_node)
urlstart_node.appendChild(lastmod_node)
urlstart_node.appendChild(changefreq_node)
urlstart_node.appendChild(priority_node)
urlstart_node.appendChild(data_node)
data_node.appendChild(display_node)
display_node.appendChild(channel_node)
display_node.appendChild(Largeblock_node)
display_node.appendChild(smallblock_node)
display_node.appendChild(blocknumber_node)
display_node.appendChild(title_node)
display_node.appendChild(subtitle_node)
display_node.appendChild(url_node)
display_node.appendChild(Horizontalpicture1_node)
display_node.appendChild(Horizontalpicture2_node)
display_node.appendChild(Horizontalpicture3_node)
display_node.appendChild(singer_node)
display_node.appendChild(area_node)
display_node.appendChild(timelength_node)
display_node.appendChild(episode_node)
display_node.appendChild(updatetime_node)
display_node.appendChild(briefintroduction_node)
display_node.appendChild(daySearchvolume_node)
display_node.appendChild(dayRetrievalstatus_node)
display_node.appendChild(sitename_node)
def end_cov(self):
try:
f = open(self.__cov_path, 'wb')
writer = codecs.lookup('utf-8')[3](f)
self.__dom.writexml(writer, encoding='utf-8')
writer.close()
f.close()
return True
except Exception, err:
traceback.print_exc()
raise Exception, '写coverage.xml文件出错'
#测试用例
test = CreateCovXml(result_filename+'.xml')
test.begin_cov()
priority_count = 0
for linkInfo in result_linkinfos:
priority_count = priority_count + 1
test.add_cov(
url = linkInfo.getTarget(),
title = linkInfo.getTitle(),
priority = str(priority_count),
Horizontalpicture1 = linkInfo.getDetailPicUrl(),
dayRetrievalstatus = "1",
channel = "资讯",
Largeblock = "带图区",
smallblock = "焦点小图",
blocknumber = "3-16"
)
test.end_cov()
os.chdir(basePath)