最近有一个需求,需要讲csv文件通过http接口post方法导入到数据库,于是写了一个脚本,主要字符编码这一块踩了不少坑,最后终于完成了,可适用windows 和linux 。
具体功能,cvs列没有顺序要求。直接贴下脚本!
#!/usr/bin/env python
#coding=utf-8
from itertools import izip
import urllib,urllib2,urllib2,json,csv,sys,time,chardet
csv_file=sys.argv[1]
#定义需要导入的列,无顺序
Check_Head=("name","phone","isSell","origin","type","city","brand","series","model","year","plate" ,"remark")
##values 为True的表示不可以为空,为False的可以为空
keys={"name":True,"phone":True,"isSell":True,"origin":True,"type":False,"city":False,"brand":False,"series":False,"model":False,"year":False,"plate":False ,"remark":False}
#post
def http_post(data,url):
jdata = json.dumps(data)
req = urllib2.Request(url, jdata)
response = urllib2.urlopen(req)
return response.read()
#获取需要处理的csv文件的字符编码
def Codeing(file):
try:
f = open( file, 'rb' )
fencoding=chardet.detect(f.read())
f.close()
return fencoding["encoding"]
except Exception,err:
print Exception, ":","%s" % err
sys.exit()
decode=Codeing(csv_file)
encode= sys.stdin.encoding
###导入询问
def Export_Inquiry():
while True :
YS=raw_input("确认导入请输入Y,退出请输入N :".decode('utf-8').encode(encode) ).strip()
if YS == "Y":
break
elif YS == "N":
sys.exit()
print u"已开始导入,导入过程中请勿退出!"
#导入环境配置
def Export_Env():
while True :
export_env=raw_input("请输入导入环境prod/uat ? :".decode('utf-8').encode(encode)).strip()
if export_env == "prod":
url='http://www.xxxxx.com/webapi/public/register_carneed'
print u"你要导入的是生产环境,请确认是否需要导入 ?"
return url
elif export_env == "uat":
print u"你要导入的是UAT环境,请确认是否需要导入 ?"
url='http://uat.xxxx.com/webapi/public/register_carneed'
return url
def Check_Csv(csv_file,Head,coding):
suffix=csv_file.split(".")[-1]
if suffix != "csv":
sys.exit("请输入csv文件")
try:
f = open( csv_file, 'rb' )
except Exception,err:
print Exception, ":","%s" % err
sys.exit()
reader = csv.reader( f )
loop=0
for row in reader:
if reader.line_num == 1:
if len(row) != len(Head):
print u"请检查你的csv文件列数与要求不一致;"
print u"你导入为%s 列:%s" % (len(row),row)
print u"正确应该%s 列:%s" % (len(Head),list(Head))
sys.exit()
err_head=[]
list_head=[]
for li in row:
li=li.strip().decode(coding)
if li not in Head:
err_head += [li]
else:
list_head += [li]
if err_head:
print u"列名有误:%s 请确认是否与以下匹配" % err_head
#print "正确请参考如下列名:"
print list(Head)
sys.exit()
list_head=tuple(list_head)
continue
else:
if len(row) == 0 : ##去除空行
print u"第%s行为空,请处理" % reader.line_num
sys.exit()
row = iter( row ) ##放入迭代器
out={}
for key in list_head:
out[key] = row.next().strip().decode(coding).encode("UTF-8") ##根据前面获取的字符编码解码
data = out
IsNull_Key={}
for (k,v) in keys.items(): ##检查不能为空的字段的值是否为空,如果为空,则记录下并告诉行号!
if v is True:
if data[k] == "":
IsNull_Key[k]=data[k]
if IsNull_Key:
print u"第%s行有空值:%s" % (reader.line_num , IsNull_Key)
loop += 1
result={"loop":loop,"list_head":list_head}
return result
f.close()
def Import_Csv(csv_file,list_head,url,coding):
f = open( csv_file, 'rb' )
reader = csv.reader( f )
for row in reader:
if reader.line_num == 1:
continue
else:
row = iter( row )
out={}
for key in list_head:
out[key] = row.next().strip().decode(coding).encode("UTF-8") ##根据前面获取的字符编码解码
data = out
for (k,v) in keys.items():
if v is False:
if data[k] == "":
del data[k]
try:
if reader.line_num % 2 == 0:
time.sleep(1)
resp=http_post(data=data,url=url)
result=json.loads(resp)
# result["success"]=False
if result["success"] is False:
print u"请注意:第%s行导入失败! %s " % (reader.line_num,resp)
else:
print u"成功导入第%s行:%s" % (reader.line_num,resp)
except Exception,err:
print Exception, ":","%s" % err
sys.exit()
f.close()
print u"导入已完成,共导入%s行。" % (reader.line_num - 1)
print "############################################"
print u"###正在校验csv文件格式,请稍等.........#####"
print "############################################"
result=Check_Csv(csv_file,Check_Head,decode)
loop=result["loop"]
list_head=result["list_head"]
if loop > 0:
print u"请按以上提示处理后再进行导入!"
sys.exit()
else:
print u"文件格式校验已完成,请选择导入环境:"
url=Export_Env()
Export_Inquiry()
Import_Csv(csv_file,list_head,url,decode)
print "############################################"
print u"################导入完成!##################"
print "############################################"
发这里也就当做笔记啦!