#-*-coding:utf-8-*-
import re
f1 = open('d:/test/mail/bounce_list.txt','r')
f2 = open('d:/test/mail/828-820.txt', 'r')
ff = open('d:/test/mail/ok1', 'w')
f3 = open('d:/test/mail/ok-sort', 'w')
f4 = open('d:/test/mail/ok-ok', 'w')
# 排除文件从f2里面踢除包含f1里的文件
file1 = [line for line in f1]
file1.sort()
file2 = [line for line in f2]
file2.sort()
for i in file2:
if i not in file1:
print i
ff.write(i)
ff.close()
# 过滤重复的内容
fff = open('d:/test/mail/ok1', 'r')
lines_seen = set()
for line in fff:
if line not in lines_seen:
f3.write(line)
lines_seen.add(line)
f3.close()
# 过滤关键字的内容
pat = '@oauth.*.com$'
f33 = open('d:/test/mail/ok-sort', 'r')
for line in f33:
if not re.findall(pat, line):
print line
f4.write(line)
f4.close()