import pandas as pd
import numpy as np
from sklearn.svm import SVC
import time
from sklearn.preprocessing import MinMaxScaler
import talib

# 以小时为单位神经网络学习,24->第25小时涨跌,涨 rsi 超买设置高些, 跌 rsi超卖设置高些
def get_data():
    pd.set_option('display.max_columns', None)
    # pd.set_option('display.max_rows', None)
    data = pd.read_csv("./data/btc_minute_2021.csv", encoding='utf-8')
    data = data.iloc[:, [3, 4, 5, 6, 7, 8, 9, 10, 11]]
    data.drop_duplicates(inplace=True)
    data.reset_index(drop=True, inplace=True)
    return data


def process_data(data):
    hour_open_list = []
    hour_high_list = []
    hour_low_list = []
    hour_close_list = []
    hour_volume_list = []
    hour_quote_asset_volume_list = []
    hour_buy_base_asset_volume_list = []
    hour_buy_quote_asset_volume_list = []

    for i in range(data.shape[0] // 60):
        temp_60_data = data.iloc[i * 60:(i + 1) * 60, :]
        hour_open = temp_60_data.open.iloc[0]
        hour_high = temp_60_data.high.max()
        hour_low = temp_60_data.low.min()
        hour_close = temp_60_data.close.iloc[-1]

        hour_open_list.append(hour_open)
        hour_high_list.append(hour_high)
        hour_low_list.append(hour_low)
        hour_close_list.append(hour_close)

        hour_volume = temp_60_data['volume'].sum()
        hour_quote_asset_volume = temp_60_data['quote_asset_volume'].sum()
        hour_buy_base_asset_volume = temp_60_data['buy_base_asset_volume'].sum()
        hour_buy_quote_asset_volume = temp_60_data['buy_quote_asset_volume'].sum()
        hour_volume_list.append(hour_volume)
        hour_quote_asset_volume_list.append(hour_quote_asset_volume)
        hour_buy_base_asset_volume_list.append(hour_buy_base_asset_volume)
        hour_buy_quote_asset_volume_list.append(hour_buy_quote_asset_volume)

        # for j in range(temp_60_data.shape[0]):
        #     print("+++")
        #     print(temp_60_data.loc[j:j, 'volume'])
        #     print("+++")
        #     hour_volume += temp_60_data.loc[j:j+1, 'volume']
        #     hour_quote_asset_volume += temp_60_data.loc[j:j+1, 'quote_asset_volume']
        #     hour_buy_base_asset_volume += temp_60_data.loc[j:j+1, 'buy_base_asset_volume']
        #     hour_buy_quote_asset_volume += temp_60_data.loc[j:j+1, 'buy_quote_asset_volume']
    # 第一个数据的label补充为0
    hour_label = [] + [0]
    for i in range(len(hour_close_list) - 1):
        price = (hour_close_list[i + 1] - hour_close_list[i]) / hour_close_list[i]
        if price > 0:
            hour_label.append(1)
        else:
            hour_label.append(0)

    dict_data = {
        "hour_open": hour_open_list,
        "hour_high": hour_high_list,
        "hour_low": hour_low_list,
        "hour_close": hour_close_list,
        "hour_volume": hour_volume_list,
        "hour_quote_asset_volume": hour_quote_asset_volume_list,
        "hour_buy_base_asset_volume": hour_buy_base_asset_volume_list,
        "hour_buy_quote_asset_volume": hour_buy_quote_asset_volume_list
    }

    df_data = pd.DataFrame(dict_data)
    # x_train = df_data.iloc[0:-1, :]
    # y_train = hour_label[0:-1]
    # x_test = df_data.iloc[-2:-1, :]
    # y_test = hour_label[-1]

    x_data = df_data.iloc[:, :]
    y_data = hour_label

    # 归一化
    # x_data = (x_data - x_data.min()) / (x_data.max() - x_data.min())
    x_data = MinMaxScaler().fit_transform(x_data)
    return x_data, y_data

Add a code snippet to your website: www.paste.org