Amir
Amirimport pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from scipy.stats import gaussian_kde
from sklearn.cross_validation import train_test_split
import pandas_highcharts
khodro = pd.read_csv('khodro.csv')
khodro.columns = ['name', 'open', 'high', 'low', 'close', 'Today Volume', 'Yesterday Volume', '2 Days ago Vol',
'Individuals Ratio', 'Individuals Sell Ratio', 'Trade Volume', 'Month', 'Day of Month',
'Day of Week',
'Latent Variable']
khodro['pdif'] = khodro['close'] - khodro['close'].shift(1)
# print khodro.tail()
khodro['profit'] = khodro['pdif'].apply(lambda x: 0 if x < 0 else 1)
khodro['close'] = (khodro['close'] - khodro['close'].mean()) / khodro['close'].std()
khodro['Today Volume'] = (khodro['Today Volume'] - khodro['Today Volume'].mean()) / khodro['Today Volume'].std()
khodro['Yesterday Volume'] = (khodro['Yesterday Volume'] - khodro['Yesterday Volume'].mean()) / khodro[
'Yesterday Volume'].std()
khodro['Individuals Ratio'] = (khodro['Individuals Ratio'] - khodro['Individuals Ratio'].mean()) / khodro[
'Individuals Ratio'].std()
x = khodro[['close', 'Today Volume', 'Yesterday Volume', 'Individuals Ratio']]
y = khodro['profit']
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)
x1 = x_train[y_train == 1]
y1 = y_train[y_train == 1]
x0 = x_train[y_train == 0]
y0 = y_train[y_train == 0]
xs = np.linspace(-2, 3, 200)
##Close-Plots
density1 = gaussian_kde(x1['close'].values)
density0 = gaussian_kde(x0['close'].values)
plt.plot(xs, density0(xs), c='red')
plt.plot(xs, density1(xs), c='green')
plt.hist(x0['close'], 52, normed=1, alpha=0.7, color='red', label='close-loss')
plt.hist(x1['close'], 52, normed=1, alpha=0.7, color='green', label='close-win')
plt.scatter(x0['close'], y0.values, c='red')
plt.scatter(x1['close'], y1.values, c='green')
plt.legend()
plt.show()