命令行的awk很方便,但处理灵活一点的话对awk不熟,深入学习又没太大必要,用python做个简单的,复杂的话也用python脚本实现,程序的一致性更好。
#!/usr/bin/python
#coding:utf-8
import os
import sys
from getopt import getopt
#处理协程
def deallines(dealer,instream,paras):
sep = paras.get("sep","\t")
for line in instream:
fields = line.strip("\r\n").split(sep)
for result in dealer(line,fields) or ():
if type(result) == str:
yield result
elif type(result) == int:
yield str(result)+"\n"
else:
yield sep.join([str(f) for f in result])+"\n"
#定义处理函数
dealerid = 0
def definedealer(dealstr,paras):
global dealerid
dealerid += 1
funcname = "dealer_"+str(dealerid)
exec "def "+funcname+"(line,f): "+dealstr
return vars()[funcname]
#打印结果
def printer(instream):
for line in instream:
print line,
#处理字符转义
def convertchar(s):
if s == "\\t":
return "\t"
elif s == "\\r":
return "\r"
elif s == "\\n":
return "\n"
elif s == "\\\\":
return "\\"
elif s == "\\'":
return "\'"
elif s == '\\"':
return '\"'
elif s == "\\a":
return "\a"
elif s == "\\b":
return "\b"
elif s == "\\e":
return "\e"
elif s == "\\v":
return "\v"
elif s == "\\f":
return "\f"
elif len(s) == 4 and s.startswith("\\x"):
return chr(eval('0x'+s[2:4]))
else:
return s
#转换参数
def convertparas(paras):
result = {"sep":"\t"}
for key,value in paras:
if key == '-F':
result["sep"] = convertchar(value)
return result
<pre name="code" class="python"><pre name="code" class="python">
FNR = 0def record(instream): global FNR for r in instream: FNR += 1 yield r #主函数def main(args): paras,deals = getopt(args[1:],'F:') paras = convertparas(paras) dealers = [definedealer(dealstr,paras) for dealstr in deals] stream = record(sys.stdin) for dealer in dealers: stream = deallines(dealer,stream,paras) printer(stream)if __name__ =='__main__': main(sys.argv)
#输入数据,姓名 年龄 身高 体重 cat person.txt
zs 10 1.3 35.0
ls 12 1.4 36.0
#print直接输出
cat person.txt | dealline 'print line,'zs 10 1.3 35.0
ls 12 1.4 36.0
#分两步,第一步提取姓名 身高 体重,第二步计算体重指数=体重/身高/身高
cat person.txt | dealline 'yield f[0],f[2],f[3]' 'yield f[0],float(f[2])/float(f[1])/float(f[1])'zs 20.7100591716
ls 18.3673469388
注:-F分隔符缺省为\t,line为整行,f为按分隔符split后的数组,FNR为记录号