前言:
某天突发奇想,想要实现一个农历生日提醒。这个时候有了如下的大概思路:
获取农历 ——> 匹配 ——> 提醒
发现实现这个需求最主要的在获取农历这一块,后边两个不是什么巨大挑战。遂查阅一番资料,发现数据库可以实现阳历转阴历功能。在数据库这一块,我是在是小菜,看不懂存储过程。蛋疼!直接放弃改用其他思路。思考了许久,想到可以通过爬虫爬取现成的日历信息,存库再进行匹配。所以,现在思路如下:
爬取农历存库 ——> 匹配姓名表 ——> 提醒
一、爬取日历网站
刚开始是按照每年一个表的思路去爬,代码及注释如下:
首先是建表:
(文件名:reptile\CreateDb.py)
# -*- coding:utf-8 -*-
import MySQLdb
import os
#将敏感信息写入环境变量 通过export去设置value
MYSQLDB_HOST=os.environ.get('MYSQLDB_HOST')
MYSQLDB_USER=os.environ.get('MYSQLDB_USER')
MYSQLDB_PASSWD=os.environ.get('MYSQLDB_PASSWD')
db = MySQLdb.connect(
host=MYSQLDB_HOST,
port=3306,
user=MYSQLDB_USER,
passwd = MYSQLDB_PASSWD,
db='Calendar',
charset = "utf8",
)
cursor = db.cursor()
#数据库插入
def Insert_mysql(sql):
cursor.execute(sql)
db.commit()
#数据库查询
def Inquire_mysql(sql):
cursor.execute(sql)
request = cursor.fetchall()
return request
if __name__ == "__main__":
st_sql = "show tables;"
cursor.execute(st_sql)
request = cursor.fetchall()
for year in range(1900,2050):
if year in request:
print "[*]%d is in database!"
else:
print "[!]%s No in the Database,create now." % year
ct_sql = """CREATE TABLE `%d` (
`DAY` date NOT NULL ,
`WEEK` varchar(50) NULL ,
`CONSTELLATON` varchar(50) NULL ,
`FESTIVAL` varchar(24) NULL ,
`YEAR` varchar(24) NULL ,
`LUNARCALENDAR` varchar(60) NULL ,
`LUNNAR` varchar(24) NULL ,
`ERSHIBASU` varchar(24) NULL ,
`JIAZI` varchar(24) NULL ,
PRIMARY KEY (`DAY`)
)
ENGINE=InnoDB
DEFAULT CHARACTER SET=utf8
;""" % year
try:
cursor.execute(ct_sql)
db.commit()
print "[.] %s OK!" % year
except:
print "Error:unable to fecth data"
db.close()
效果:
接着爬取:
(文件名:reptile\Spider.py)
#coding:utf-8
import re,urllib2
from bs4 import BeautifulSoup
from urllib import urlencode
class SiteData:
def __init__(self,url):
self.Url = url
def Data(self):
#伪装头
values = {'Accept':'text/html,application/xhtml+xml,application/xml;q=0.9,p_w_picpath/w ebp,*/*;q=0.8',
'Accept-Language':'zh-CN,zh;q=0.8',
'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/46.0.2490.86 Safari/537.36'
}
SiteUrl = urllib2.Request(self.Url,headers=values)
WebSite = urllib2.urlopen(SiteUrl)
HtmlData = WebSite.read()
WebSite.close()
return HtmlData
def Find(self):
#BeautifulSoup筛选出我要的内容
HtmlData = self.Data()
HtmlSoup = BeautifulSoup(HtmlData,"html5lib")
FindALL = HtmlSoup.find_all("a")
List=[]
for i in FindALL:
filter = i.encode('gb2312').replace(' ',' ').replace(';','')
if "#" in filter:
ReplaceText = filter.replace('<a href="#" onclick="return(false)" title="','')
Extract2 = re.sub(r"\<.*\>","",ReplaceText)
Extract1 = re.sub(r"[0-9]+\:[0-9]+","",Extract2)
Extract = Extract1.replace(' 0-','').replace('">','')
List.append(Extract)
else:
pass
return List
开始爬取:
(文件名:reptile\Run.py)
#coding:utf-8
import time
from CreateDb import db,cursor #将数据库操作的一些方法加进来
from Spider import SiteData #将爬虫操作的方法加进来
def ListMark(list):
LIST = []
items = list.split('\n')
List = [items[1],items[2],items[3],items[4],items[6],items[7],items[8]]
for i in List:
item,value = i.split(':')
LIST.append(value)
return LIST
def get_item(text):
item,value = text.split(' ')
return item
def get_value(text):
item,value = text.split(' ')
return value
def UTF(text):
TEXT = text.decode('gbk').encode('utf8')
return TEXT
if __name__ == '__main__':
for year in range(1900,2050):
sql = "select DAY from Calendar.%s order by DAY desc limit 1" % year
cursor.execute(sql)
request = cursor.fetchall()
print "[!]process %s" % year
try:
Month = str(request).split(',')[1]
except:
Month = str(request)
if Month == "()":
print "[!]No data in Table! Crawling now.."
for m in range(1,13):
url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,m)
OpenSite = SiteData(url)
returnList = OpenSite.Find()
for i in returnList:
List = ListMark(i)
Day = get_item(List[0])
Week = get_value(List[0])
Constellation = List[1]
Festival = List[2]
Year = List[3][0:4]
Lunarcalendar = List[3][4:]
Lunnar = List[4]
Ershibasu = List[5]
Jiazi = List[6]
Insert_sql = "insert into Calendar.%s values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \
% (year,Day,UTF(Week),UTF(Constellation),UTF(Festival),\
UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi))
try:
cursor.execute(Insert_sql)
db.commit()
print "[ ]Insert %s data ..." % Day
except:
print "[!]Insert %s Error!!!" % Day
db.rollback()
time.sleep(0.1)
try:
if Month == 12:
print "[ ] Have data in Table."
else:
NodataMoth = range(int(Month)+1,13)
for M in NodataMoth:
url = "http://www.nongli.com/item3/rili_%s-%s.htm" % (year,M)
OpenSite = SiteData(url)
returnList = OpenSite.Find()
for i in returnList:
List = ListMark(i)
Day = get_item(List[0])
Week = get_value(List[0])
Constellation = List[1]
Festival = List[2]
Year = List[3][0:4]
Lunarcalendar = List[3][4:]
Lunnar = List[4]
Ershibasu = List[5]
Jiazi = List[6]
Insert_sql = "insert into Calendar.%s values('%s','%s','%s','%s','%s','%s','%s','%s','%s')" \
% (year,Day,UTF(Week),UTF(Constellation),UTF(Festival),\
UTF(Year),UTF(Lunarcalendar),UTF(Lunnar),UTF(Ershibasu),UTF(Jiazi))
try:
cursor.execute(Insert_sql)
print "[ ]Insert %s data ..." % Day
db.commit()
except:
print "[!]Insert %s Error!!!" % Day
db.rollback()
time.sleep(0.1)
except:
print "[!]Unknown Error!"
db.close()
效果:
平均1条6条数据 从1900-2049年需要2个半小时以上时间。
爬完仔细一下,一个表才365条记录,1900-2049才150年 完全可以合成一张表。
遂,写脚本,将所有表合体,现在所有日期数据都在数据库里了:
二、将想要提醒的人加入一张表,用于匹配
姓名表比较简单,主要有姓名,性别,日期,农历日期。一些星座等其他信息可以根据需要自己添加。一个字段一个字段添加太过蛋疼,可以用数据库触发器自动填充一些,但是。。我!不!会!遂,写了个脚本用于添加:
(文件名:add_use.py)
#coding:utf-8
import re,sys
from reptile.CreateDb import Insert_mysql,Inquire_mysql
def UTF(text):
TEXT = text.decode('UTF-8').encode('UTF-8')
return TEXT
#新增用户的方法
def add(Name,Sex,Birthday):
#自动新增ID,匹配最后一个ID,新ID是最后一个ID+1
Last_id_sql = "SELECT Tb_use.USE_ID FROM Calendar.`Tb_use` \
ORDER BY Tb_use.USE_ID DESC LIMIT 1"
Last_id = Inquire_mysql(Last_id_sql)
try:
m = re.search(r"[0-9]+",str(Last_id))
n = m.group()
id = int(n)+1
except:
id = 1
#如果填入为1 则为男性
if Sex == "1":
SEX = "男"
else:
SEX = "女"
birthday_sql = "SELECT Calendar.Lunarcalendar FROM Calendar WHERE \
Calendar.TB_DAY = '%s';" % Birthday
#匹配日历表 自动填充农历信息
Lunarcalendar = Inquire_mysql(birthday_sql)
Lunar = Lunarcalendar[0][0].encode('UTF-8')
Insert_sql ="INSERT INTO `Tb_use` (`USE_ID`,`NAME`,`SEX`,`TB_DAY`,\
`LUNARCALENDAR`) VALUES ('%s','%s','%s','%s','%s');" % \
(id,UTF(Name),UTF(SEX),Birthday,Lunar)
Insert_mysql(Insert_sql)
print "[!]新增新记录,编号为%s\n姓名:%s 性别:%s \n生日:%s 农历:%s" \
%(id,Name,SEX,Birthday,Lunar)
#删除用户的方法可以根据新增用户方法去修改,这里pass
def DEL(self):
pass
#将方法映射到字典
def run(type,Name=None,Sex=None,Birthday=None):
RUN = {
'add':lambda:add(Name,Sex,Birthday),
'del':lambda:DEL()
}
return RUN[type]()
if __name__ == "__main__":
#实现类似命令行交互式效果
while True:
echo = raw_input(">>")
Split = echo.split(' ')
if echo == "exit":
print "Exit!"
break
elif echo == "":
pass
#如果输入信息正确,则运行方法
elif Split[0] == "add" or Split[0] == "del":
try:
a = run(Split[0],Split[1],Split[2],Split[3])
except:
pass
else:
print "Please Use 'add' to Add user."
三、匹配后发送提醒
(文件名:Run.py)
#coding:utf-8
import time,json,smtplib,os
from reptile.CreateDb import Insert_mysql,Inquire_mysql
from email.mime.text import MIMEText
from email.MIMEMultipart import MIMEMultipart
from email.MIMEBase import MIMEBase
#今天的日期
Today = time.strftime('%Y-%m-%d',time.localtime(time.time()))
#从数据库从获取今天的ID
Today_Sql = "SELECT Calendar.ID FROM `Calendar` WHERE TB_DAY='%s';" % Today
TodayID = Inquire_mysql(Today_Sql)[0][0]
#这个方法用于获取今天日期
def Get_today(id):
Sql = "SELECT Calendar.TB_DAY FROM `Calendar` WHERE ID='%s';" % id
Today = Inquire_mysql(Sql)[0][0]
return Today
#用于获取今天有哪些小伙伴生日
def Today_Birthday(id):
Text = []
Today_Sql = "SELECT Calendar.LUNARCALENDAR FROM `Calendar` WHERE ID='%s'" % id
TodayLunar = Inquire_mysql(Today_Sql)[0][0]
Today_Birthday_sql = "SELECT Tb_use.NAME FROM `Tb_use` WHERE LUNARCALENDAR='%s';" % TodayLunar
Today_Birthday = Inquire_mysql(Today_Birthday_sql)
if str(Today_Birthday) == '()':
Text = []
else:
for i in range(0,len(Today_Birthday)):
Text.append(Today_Birthday[i][0])
if Text == []:
Str = ""
else:
Str = '\t'.join(i.encode('utf-8') for i in Text)
return Str
#用于获取7天内有哪些小伙伴生日
def Week_Birthday():
Text = []
for item in range(TodayID,TodayID+7):
if Today_Birthday(item) == "":
pass
else:
Text.append("%s : %s" % (Get_today(item),Today_Birthday(item)))
Str = '\n'.join(i for i in Text)
return Str
#发邮件提醒
def Mail(Subject,text):
MAIL_USER=os.environ.get('MAIL_USER')
MAIL_PASSWD=os.environ.get('MAIL_PASSWD')
sender = 'cctv<你的邮箱@qq.com>'
receiver = ['发送到@139.com']
subject = Subject
smtpserver = 'smtp.mail.qq.com'
msg = MIMEMultipart('alternative')
msg['Subject'] = subject
html = text
part = MIMEText(html,'html','utf-8')
msg.attach(part)
smtp = smtplib.SMTP()
smtp.connect('smtp.mail.qq.com')
smtp.login(MAIL_USER,MAIL_PASSWD)
smtp.sendmail(sender,receiver,msg.as_string())
smtp.quit()
if __name__ == '__main__':
#获取今天星期
a = time.localtime()
Time = time.strftime("%w",a)
#今天生日的小伙伴
tb_text = Today_Birthday(TodayID)
#7天内生日的小伙伴
wb_text = Week_Birthday()
#如果今天有人生日就发邮件
if tb_text == '':
pass
else:
Mail('TodayBirthday',tb_text)
#每周一查一下,如果未来7天内有人生日就发邮件
if Time == '1':
if wb_text == '':
pass
else:
Mail('WeekBirthday',wb_text)
else:
pass
最后,加入任务计划
(文件名:Run.sh)
#!/bin/bash
export MYSQLDB_USER=****
export MYSQLDB_PASSWD=****
export MYSQLDB_HOST=****
export MAIL_PASSWD=****
export MAIL_USER=****
#我是在虚拟环境下的,所以要用虚拟环境的路径运行
`/home/ubuntu/class/env/bin/python /home/ubuntu/class/LunarBirthday/Run.py`
加入任务计划,大功告成!