keras_timeseries/lstm_zimbrao.py at master · edniemeyer/keras_timeseries

History

1

2

3

4

5

6

7

8

9

10

11

12

13

14

15

16

17

18

19

20

21

22

23

24

25

26

27

28

29

30

31

32

33

34

35

36

37

38

39

40

41

42

43

44

45

46

47

48

49

50

51

52

53

54

55

56

57

58

59

60

61

62

63

64

65

66

67

68

69

70

71

72

73

74

75

76

77

78

79

80

81

82

83

84

85

86

87

88

89

90

91

92

93

94

95

96

97

98

99

100

101

102

103

104

105

106

107

108

109

110

111

112

113

114

115

116

117

118

119

120

121

122

123

124

125

126

127

128

129

130

131

132

133

134

135

136

137

138

139

140

141

142

143

144

145

146

147

148

149

150

151

152

153

154

155

156

157

158

159

160

161

162

163

164

165

166

167

168

169

170

171

172

173

174

from __future__ import print_function

import sys

import json

import numpy as np

import pandas

import math

import matplotlib.pylab as plt

#import talib

seed=7

np.random.seed(seed) # for reproducibility

from processing import *

from keras.models import Sequential

from keras.layers.core import Dense, Activation, Dropout, Flatten

from keras.layers.recurrent import LSTM

from keras.optimizers import SGD

from keras.utils import np_utils

from custom_callbacks import CriteriaStopping

from keras.callbacks import CSVLogger, EarlyStopping, ModelCheckpoint, TensorBoard

#from hyperbolic_nonlinearities import AdaptativeAssymetricBiHyperbolic, AdaptativeBiHyperbolic, AdaptativeHyperbolicReLU, AdaptativeHyperbolic, PELU

#from keras.layers.advanced_activations import ParametricSoftplus, SReLU, PReLU, ELU, LeakyReLU, ThresholdedReLU

start_time = time.time()

# dataframe = pandas.read_csv('ibov_google_15jun2017_1min_15d.csv', sep = ',', usecols=[1], engine='python', skiprows=8, decimal='.',header=None)

# dataset = dataframe[1].tolist()

train = pandas.read_csv('minidolar/train.csv', sep = ',', engine='python', decimal='.',header=0)

test = pandas.read_csv('minidolar/test.csv', sep = ',', engine='python', decimal='.',header=0)

#dataset = dataframe['fechamento'].tolist()

train_shift = train['shift']

train_target = train['f0']

train_close = train[['v3','v7','v11','v15','v19','v23','v27','v31','v35','v39','v43','v47','v51','v55','v59','v63','v67','v71','v75','v79','v83','v87','v91','v95','v99','v103','v107','v111','v115','v119']]

#para reduzir um sample (3781->3780) e ficar par para usar msm batch_size

train_close = train_close[:-1]

train_target = train_target[:-1]

train_shift = train_shift[:-1]

test_shift = test['shift']

test_target = test['f0']

test_close = test[['v3','v7','v11','v15','v19','v23','v27','v31','v35','v39','v43','v47','v51','v55','v59','v63','v67','v71','v75','v79','v83','v87','v91','v95','v99','v103','v107','v111','v115','v119']]

batch_size = 10

nb_epoch = 100

patience = 50

look_back = 7

def evaluate_model(model, name, n_layers, ep):

X_train, X_test, Y_train, Y_test = np.array(train_close), np.array(test_close), np.array(train_target.values.reshape(train_target.size,1)), np.array(test_target.values.reshape(test_target.size,1))

X_trainp, X_testp, Y_trainp, Y_testp = X_train+train_shift.values.reshape(train_shift.size,1), X_test+test_shift.values.reshape(test_shift.size,1), Y_train+train_shift.values.reshape(train_shift.size,1), Y_test + test_shift.values.reshape(test_shift.size,1)

csv_logger = CSVLogger('output/%d_layers/%s.csv' % (n_layers, name))

es = EarlyStopping(monitor='loss', patience=patience)

#mcp = ModelCheckpoint('output/mnist_adaptative_%dx800/%s.checkpoint' % (n_layers, name), save_weights_only=True)

#tb = TensorBoard(log_dir='output/mnist_adaptative_%dx800' % n_layers, histogram_freq=1, write_graph=False, write_images=False)

#sgd = SGD(lr=0.01, momentum=0.9, nesterov=True)

#optimizer = sgd

optimizer = "adam"

#optimizer = "adadelta"

model.compile(loss='mean_squared_error', optimizer=optimizer)

# reshape input to be [samples, time steps, features]

X_train = np.reshape(X_train, (X_train.shape[0], X_train.shape[1], 1))

X_test = np.reshape(X_test, (X_test.shape[0], X_test.shape[1], 1))

#X_train = np.expand_dims(X_train, axis=2)

#X_test = np.expand_dims(X_test, axis=2)

#history = model.fit(X_train, Y_train, batch_size=batch_size, epochs=ep, verbose=0, validation_split=0.1, callbacks=[csv_logger,es])

history = model.fit(X_train, Y_train, epochs=ep, batch_size=batch_size, verbose=0, shuffle=False)

#trainScore = model.evaluate(X_train, Y_train, verbose=0)

#print('Train Score: %f MSE (%f RMSE)' % (trainScore, math.sqrt(trainScore)))

#testScore = model.evaluate(X_test, Y_test, verbose=0)

#print('Test Score: %f MSE (%f RMSE)' % (testScore, math.sqrt(testScore)))

# make predictions (scaled)

trainPredict = model.predict(X_train, batch_size=batch_size)

testPredict = model.predict(X_test, batch_size=batch_size)

# invert predictions (back to original)

new_predicted = testPredict+test_shift.values.reshape(test_shift.size,1)

new_train_predicted= trainPredict+train_shift.values.reshape(train_shift.size,1)

# calculate root mean squared error

trainScore = math.sqrt(mean_squared_error(new_train_predicted, Y_trainp))

#print('Train Score: %f RMSE' % (trainScore))

testScore = math.sqrt(mean_squared_error(new_predicted, Y_testp))

#print('Test Score: %f RMSE' % (testScore))

epochs = nb_epoch

# fig = plt.figure()

# plt.plot(Y_test[:150], color='black') # BLUE - trained RESULT

# plt.plot(testPredict[:150], color='blue') # RED - trained PREDICTION

#plt.plot(Y_testp[:150], color='green') # GREEN - actual RESULT

#plt.plot(new_predicted[:150], color='red') # ORANGE - restored PREDICTION

#plt.show()

return trainScore, testScore, epochs, optimizer

def __main__(argv):

n_layers = int(argv[0])

print(n_layers,'layers')

#nonlinearities = ['aabh', 'abh', 'ah', 'sigmoid', 'relu', 'tanh']

#nonlinearities = ['sigmoid', 'relu', 'tanh']

nonlinearities = ['relu']

with open("output/%d_layers/compare.csv" % n_layers, "a") as fp:

fp.write("-MINIDOLAR/LSTM NN\n")

hals = []

TRAIN_SIZE = 30

TARGET_TIME = 1

LAG_SIZE = 1

EMB_SIZE = 1

HIDDEN_RNN = 16

testScore_aux = 999999

f_aux = 0

for f in range(1,2):

name='relu'

model = Sequential()

#model.add(Dense(500, input_shape = (TRAIN_SIZE, )))

#model.add(Activation(name))

# model.add(LSTM(batch_size=batch_size,

# input_shape = (TRAIN_SIZE, EMB_SIZE,),

# units=HIDDEN_RNN, return_sequences=False, stateful=False, dropout=0.2, recurrent_dropout=0.2))

n_layers = n_layers+1 #para que o input 0 seja realmente uma camada, 1 serem 2, etc

for l in range(n_layers):

if(l==n_layers-1):

model.add(LSTM(batch_size=batch_size,

input_shape = (TRAIN_SIZE, EMB_SIZE,),

units=HIDDEN_RNN, return_sequences=False, stateful=False, dropout=0.2, recurrent_dropout=0.2))

else:

model.add(LSTM(batch_size=batch_size,

input_shape = (TRAIN_SIZE, EMB_SIZE,),

units=HIDDEN_RNN, return_sequences=True, stateful=False, dropout=0.2, recurrent_dropout=0.2))

model.add(Dense(1))

model.add(Activation('linear'))

#model.summary()

trainScore, testScore, epochs, optimizer = evaluate_model(model, name, n_layers,nb_epoch)

if(testScore_aux > testScore):

testScore_aux=testScore

f_aux = f

elapsed_time = (time.time() - start_time)

with open("output/%d_layers/compare.csv" % (n_layers-1), "a") as fp:

fp.write("%i,%s,%f,%f,%d,%s --%s seconds\n" % (f, name, trainScore, testScore, epochs, optimizer, elapsed_time))

model = None

print("melhor parametro: %i" % f_aux)

if __name__ == "__main__":

__main__(sys.argv[1:])

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

lstm_zimbrao.py

lstm_zimbrao.py

Files

lstm_zimbrao.py

Latest commit

History

lstm_zimbrao.py

File metadata and controls