import numpy as np
import pandas as pd
from keras.layers.core import Dense, Activation, Dropout
from keras.layers.recurrent import LSTM
from keras.models import Sequential
import matplotlib.pyplot as plt
import time
import datetime as dt
import urllib.request, json
import os
from pandas_datareader import data
import tweepy
from textblob import TextBlob
import re
data_source = 'alphavantage'
if data_source == 'alphavantage':
# ====================== Loading Data from Alpha Vantage ==================================
api_key = '7TONQ8CM5PXZ4YEO'
# TCS stock market prices
ticker = "TCS"
# JSON file with all the stock market data for TCS
url_string = "https://www.alphavantage.co/query?function=TIME_SERIES_DAILY&symbol=%s&outputsize=full&apikey=%s"%(ticker,api_key)
# Save data to this file
file_to_save = 'stock_market_data-%s.csv'%ticker
# If you haven't already saved data,
# Go ahead and grab the data from the url
# And store date, low, high, volume, close, open values to a Pandas DataFrame
if not os.path.exists(file_to_save):
with urllib.request.urlopen(url_string) as url:
data = json.loads(url.read().decode())
# extract stock market data
data = data['Time Series (Daily)']
df = pd.DataFrame(columns=['Date','Low','High','Close','Open'])
for k,v in data.items():
date = dt.datetime.strptime(k, '%Y-%m-%d')
data_row = [date.date(),float(v['3. low']),float(v['2. high']),
float(v['4. close']),float(v['1. open'])]
df.loc[-1,:] = data_row
df.index = df.index + 1
print('Data saved to : %s'%file_to_save)
df.to_csv(file_to_save)
# If the data is already there, just load it from the CSV
else:
print('File already exists. Loading data from CSV')
df = pd.read_csv(file_to_save)
df = pd.read_csv("stock_market_data-TCS.csv")
df = df[['Date', 'Open', 'Close', 'Low', 'High']]
df = df.sort_values('Date')
df.head()
df.tail()
plt.figure(figsize = (18,9))
plt.plot(range(df.shape[0]),(df['Low']+df['High'])/2.0)
plt.xticks(range(0,df.shape[0],100),df['Date'].loc[::100],rotation=45)
plt.xlabel('Date',fontsize=18)
plt.ylabel('Mid Price',fontsize=18)
plt.show()
high_prices = df.loc[:,'High']
low_prices = df.loc[:,'Low']
df["Mid Prices"] = (high_prices+low_prices)/2.0
df.head()
df.tail()
df.drop("Date", axis = 1,inplace = True)
from sklearn.preprocessing import StandardScaler
scaler = StandardScaler()
scaler.fit(df)
df = scaler.transform(df)
def build_model(layers):
model = Sequential()
model.add(LSTM(
input_shape=(None, layers[0]),
units=50,
return_sequences=True))
model.add(Dropout(0.2))
model.add(LSTM(
100,
return_sequences=False))
model.add(Dropout(0.2))
model.add(Dense(
units=1))
model.add(Activation('linear'))
start = time.time()
model.compile(loss='mse', optimizer='rmsprop')
print ('compilation time : ', time.time() - start)
return model
def load_data(stock, seq_len):
amount_of_features = 5
data = stock
sequence_length = seq_len + 1
result = []
for index in range(len(data) - sequence_length):
result.append(data[index: index + sequence_length])
result = np.array(result)
row = round(0.75 * result.shape[0])
train = result[:int(row), :]
x_train = train[:, :-1]
y_train = train[:, -1][:,-1]
x_test = result[int(row):, :-1]
y_test = result[int(row):, -1][:,-1]
x_train = np.reshape(x_train, (x_train.shape[0], x_train.shape[1], amount_of_features))
x_test = np.reshape(x_test, (x_test.shape[0], x_test.shape[1], amount_of_features))
return [x_train, y_train, x_test, y_test]
window = 5
X_train, y_train, X_test, y_test = load_data(df, window)
print("X_train", X_train.shape)
print("y_train", y_train.shape)
print("X_test", X_test.shape)
print("y_test", y_test.shape)
model = build_model([5,window,1])
model.fit(
X_train,
y_train,
batch_size=512,
epochs=30,
validation_split=0.1,
verbose=1)
trainScore = model.evaluate(X_train, y_train, verbose=0)
print(trainScore)
accuracy = (1-trainScore)*100
p = model.predict(X_test)
from sklearn.metrics import mean_squared_error
mse = mean_squared_error(y_test, p)
accuracy_test = (1-mse)*100
import matplotlib.pyplot as plt2
plt2.figure(figsize = (18,9))
plt2.plot(p,color='red', label='prediction')
plt2.plot(y_test,color='blue', label='y_test')
plt2.legend(loc='upper left')
plt2.show()
consumer_key= 'E0pFYVai9VaOhqLiRBEC6gpGF'
consumer_secret= 'XAMh4l9XL5nwFK3MN5tAjtXA2YgDN1tw5f7L2n6dz5ib8VYlbm'
access_token='3261604734-86c7DOJP98GwNeFWzvgPQKFUTyHn1ZFwlloJP3v'
access_token_secret='eXEmlEAdxaFjueVP03jsAWeOeNMkI7ToiDQkyvLDa6eX7'
auth = tweepy.OAuthHandler(consumer_key, consumer_secret)
auth.set_access_token(access_token, access_token_secret)
api = tweepy.API(auth)
public_tweets = api.search('TCS', count = 1000)
def clean_tweet(tweet):
return ' '.join(re.sub("(@[A-Za-z0-9]+)|([^0-9A-Za-z \t])|(\w+:\/\/\S+)", " ", tweet).split())
def get_tweet_sentiment(tweet):
analysis = TextBlob(clean_tweet(tweet))
if analysis.sentiment.polarity > 0:
return 'positive'
elif analysis.sentiment.polarity == 0:
return 'neutral'
else:
return 'negative'
positive = 0
negative = 0
neutral = 0
print(len(public_tweets))
for tweet in public_tweets:
# print(tweet.text)
print(clean_tweet(tweet.text))
sentiment = get_tweet_sentiment(tweet.text)
print(sentiment)
if sentiment == 'positive':
positive += 1
elif sentiment == 'negative':
negative += 1
elif sentiment == 'neutral':
neutral += 1
print("")
print('Positive:', positive)
print('Negative:', negative)
print('Neutral:', neutral)