这篇文章主要介绍如何利用Jupyter Notekook做初步分析,文中介绍的非常详细,具有一定的参考价值,感兴趣的小伙伴们一定要看完!
最近一段时间都是Jupyter Notebook做策略的最初版本设计,就是行情导入画图一类。
之前做个dataframe做分析容易,这个算是简化版本。
新建一个DataAnalyzer 类,这个简单很多,支持从csv和mongodb导入行情数据,和从1分钟k线整合不同分钟k线
下面是导入1分钟螺纹钢数据,整合为5分钟K线
from pymongo import MongoClient, ASCENDINGimport pandas as pdimport numpy as npfrom datetime import datetimeimport talibimport matplotlib.pyplot as pltimport scipy.stats as st%matplotlib inline%config InlineBackend.figure_format = 'retina'class DataAnalyzer(object): """ """ def __init__(self, exportpath="C:\Project\\", datformat=['datetime', 'high', 'low', 'open', 'close','volume']): self.mongohost = None self.mongoport = None self.db = None self.collection = None self.df = pd.DataFrame() self.exportpath = exportpath self.datformat = datformat self.startBar = 2 self.endBar = 12 self.step = 2 self.pValue = 0.015 def db2df(self, db, collection, start, end, mongohost="localhost", mongoport=27017, export2csv=False): """读取MongoDB数据库行情记录,输出到Dataframe中""" self.mongohost = mongohost self.mongoport = mongoport self.db = db self.collection = collection dbClient = MongoClient(self.mongohost, self.mongoport, connectTimeoutMS=500) db = dbClient[self.db] cursor = db[self.collection].find({'datetime':{'$gte':start, '$lt':end}}).sort("datetime",ASCENDING) self.df = pd.DataFrame(list(cursor)) self.df = self.df[self.datformat] self.df = self.df.reset_index(drop=True) path = self.exportpath + self.collection + ".csv" if export2csv == True: self.df.to_csv(path, index=True, header=True) return self.df def csv2df(self, csvpath, dataname="csv_data", export2csv=False): """读取csv行情数据,输入到Dataframe中""" csv_df = pd.read_csv(csvpath) self.df = csv_df[self.datformat] self.df["datetime"] = pd.to_datetime(self.df['datetime']) # self.df["high"] = self.df['high'].astype(float) # self.df["low"] = self.df['low'].astype(float) # self.df["open"] = self.df['open'].astype(float) # self.df["close"] = self.df['close'].astype(float) # self.df["volume"] = self.df['volume'].astype(int) self.df = self.df.reset_index(drop=True) path = self.exportpath + dataname + ".csv" if export2csv == True: self.df.to_csv(path, index=True, header=True) return self.df def df2Barmin(self, inputdf, barmins, crossmin=1, export2csv=False): """输入分钟k线dataframe数据,合并多多种数据,例如三分钟/5分钟等,如果开始时间是9点1分,crossmin = 0;如果是9点0分,crossmin为1""" dfbarmin = pd.DataFrame() highBarMin = 0 lowBarMin = 0 openBarMin = 0 volumeBarmin = 0 datetime = 0 for i in range(0, len(inputdf) - 1): bar = inputdf.iloc[i, :].to_dict() if openBarMin == 0: openBarmin = bar["open"] if highBarMin == 0: highBarMin = bar["high"] else: highBarMin = max(bar["high"], highBarMin) if lowBarMin == 0: lowBarMin = bar["low"] else: lowBarMin = min(bar["low"], lowBarMin) closeBarMin = bar["close"] datetime = bar["datetime"] volumeBarmin += int(bar["volume"]) # X分钟已经走完 if not (bar["datetime"].minute + crossmin) % barmins: # 可以用X整除 # 生成上一X分钟K线的时间戳 barMin = {'datetime': datetime, 'high': highBarMin, 'low': lowBarMin, 'open': openBarmin, 'close': closeBarMin, 'volume' : volumeBarmin} dfbarmin = dfbarmin.append(barMin, ignore_index=True) highBarMin = 0 lowBarMin = 0 openBarMin = 0 volumeBarmin = 0 if export2csv == True: dfbarmin.to_csv(self.exportpath + "bar" + str(barmins)+ str(self.collection) + ".csv", index=True, header=True) return dfbarminexportpath = "C:\\Project\\"DA = DataAnalyzer(exportpath)#数据库导入start = datetime.strptime("20190920", '%Y%m%d')end = datetime.now()dfrb8888 = DA.db2df(db="VnTrader_1Min_Db", collection="rb8888", start = start, end = end,export2csv=True)dfrb5min = DA.df2Barmin(dfrb8888,5,crossmin=1, export2csv=True)dfrb5min.tail()
计算5分钟K线的参照,包括标准差,rsi,5分钟均线,和40分钟均线
logdata = pd.DataFrame()logdata['close'] =(dfrb5min['close'])# logdata['tr'] = talib.ATR(np.array(dfrb8888['high']), np.array(dfrb8888['low']), np.array(dfrb8888['close']) ,1)# logdata['atr'] = talib.ATR(np.array(dfrb8888['high']), np.array(dfrb8888['low']), np.array(dfrb8888['close']) ,20)logdata['std20'] = talib.STDDEV( np.array(dfrb5min['close']) ,20)logdata['rsi30'] = talib.RSI(np.array(dfrb5min['close']) ,30)logdata['sma5'] = talib.SMA(np.array(dfrb5min['close']) ,5)logdata['sma40'] = talib.SMA(np.array(dfrb5min['close']) ,40)logdata.plot(subplots=True,figsize=(18,16))
使用快慢均线策略,显示买入卖出点
closeArray = np.array(logdata['close'])listup,listdown = [],[]for i in range(1,len(logdata['close'])): if logdata.loc[i,'sma5'] > logdata.loc[i,'sma40'] and logdata.loc[i-1,'sma5'] < logdata.loc[i-1,'sma40']: listup.append(i) elif logdata.loc[i,'sma5'] < logdata.loc[i,'sma40'] and logdata.loc[i-1,'sma5'] > logdata.loc[i-1,'sma40']: listdown.append(i)fig=plt.figure(figsize=(18,6))plt.plot(closeArray, color='y', lw=2.)plt.plot(closeArray, '^', markersize=5, color='r', label='UP signal', markevery=listup)plt.plot(closeArray, 'v', markersize=5, color='g', label='DOWN signal', markevery=listdown)plt.legend()plt.show()
以上是“如何利用Jupyter Notekook做初步分析”这篇文章的所有内容,感谢各位的阅读!希望分享的内容对大家有帮助,更多相关知识,欢迎关注编程网行业资讯频道!