import pandas as pd
import numpy as np
from sklearn.svm import SVC
import time
from sklearn.preprocessing import MinMaxScaler
import talib
# 以小时为单位神经网络学习,24->第25小时涨跌,涨 rsi 超买设置高些, 跌 rsi超卖设置高些
def get_data():
pd.set_option('display.max_columns', None)
# pd.set_option('display.max_rows', None)
data = pd.read_csv("./data/btc_minute_2021.csv", encoding='utf-8')
data = data.iloc[:, [3, 4, 5, 6, 7, 8, 9, 10, 11]]
data.drop_duplicates(inplace=True)
data.reset_index(drop=True, inplace=True)
return data
def process_data(data):
hour_open_list = []
hour_high_list = []
hour_low_list = []
hour_close_list = []
hour_volume_list = []
hour_quote_asset_volume_list = []
hour_buy_base_asset_volume_list = []
hour_buy_quote_asset_volume_list = []
for i in range(data.shape[0] // 60):
temp_60_data = data.iloc[i * 60:(i + 1) * 60, :]
hour_open = temp_60_data.open.iloc[0]
hour_high = temp_60_data.high.max()
hour_low = temp_60_data.low.min()
hour_close = temp_60_data.close.iloc[-1]
hour_open_list.append(hour_open)
hour_high_list.append(hour_high)
hour_low_list.append(hour_low)
hour_close_list.append(hour_close)
hour_volume = temp_60_data['volume'].sum()
hour_quote_asset_volume = temp_60_data['quote_asset_volume'].sum()
hour_buy_base_asset_volume = temp_60_data['buy_base_asset_volume'].sum()
hour_buy_quote_asset_volume = temp_60_data['buy_quote_asset_volume'].sum()
hour_volume_list.append(hour_volume)
hour_quote_asset_volume_list.append(hour_quote_asset_volume)
hour_buy_base_asset_volume_list.append(hour_buy_base_asset_volume)
hour_buy_quote_asset_volume_list.append(hour_buy_quote_asset_volume)
# for j in range(temp_60_data.shape[0]):
# print("+++")
# print(temp_60_data.loc[j:j, 'volume'])
# print("+++")
# hour_volume += temp_60_data.loc[j:j+1, 'volume']
# hour_quote_asset_volume += temp_60_data.loc[j:j+1, 'quote_asset_volume']
# hour_buy_base_asset_volume += temp_60_data.loc[j:j+1, 'buy_base_asset_volume']
# hour_buy_quote_asset_volume += temp_60_data.loc[j:j+1, 'buy_quote_asset_volume']
# 第一个数据的label补充为0
hour_label = [] + [0]
for i in range(len(hour_close_list) - 1):
price = (hour_close_list[i + 1] - hour_close_list[i]) / hour_close_list[i]
if price > 0:
hour_label.append(1)
else:
hour_label.append(0)
dict_data = {
"hour_open": hour_open_list,
"hour_high": hour_high_list,
"hour_low": hour_low_list,
"hour_close": hour_close_list,
"hour_volume": hour_volume_list,
"hour_quote_asset_volume": hour_quote_asset_volume_list,
"hour_buy_base_asset_volume": hour_buy_base_asset_volume_list,
"hour_buy_quote_asset_volume": hour_buy_quote_asset_volume_list
}
df_data = pd.DataFrame(dict_data)
# x_train = df_data.iloc[0:-1, :]
# y_train = hour_label[0:-1]
# x_test = df_data.iloc[-2:-1, :]
# y_test = hour_label[-1]
x_data = df_data.iloc[:, :]
y_data = hour_label
# 归一化
# x_data = (x_data - x_data.min()) / (x_data.max() - x_data.min())
x_data = MinMaxScaler().fit_transform(x_data)
return x_data, y_dataAdd a code snippet to your website: www.paste.org