Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Download
1041 views
Kernel: Python 3 (Anaconda)
import utility import copy import numpy as np import xlsxwriter import os def save_to_xlsx(work_book_name, date_vecs, rsi_vecs, mfi_vecs, ema_vecs, so_vecs, macd_vecs, sl_vecs, fp_vecs, cl_vecs, avg_vecs): root = "/projects/d2e32407-baf4-4631-8512-ce12e859eb9c/Priprema DataSeta/Feature_extracts" workbook = xlsxwriter.Workbook(os.path.join(root, work_book_name + '.xlsx')) worksheet = workbook.add_worksheet() worksheet.set_column('A:A', 20) row = 0 col = 0 worksheet.write_row(row, col, ("date", "rsi", "mfi", "ema", "so", "macd", "sl", "f_p", "cl_p", "avg_p")) for date, rsi, mfi, ema, so, macd, sl, f_p, cl_p, avg_p in \ zip(date_vecs, rsi_vecs, mfi_vecs, ema_vecs, so_vecs, macd_vecs, sl_vecs, fp_vecs, cl_vecs, avg_vecs): row += 1 data_to_write = (date, rsi, mfi, ema, so, macd, sl, f_p, cl_p, avg_p) worksheet.write_row(row, col, data_to_write) workbook.close() def main(): # get file list with xsls extension root = "/projects/d2e32407-baf4-4631-8512-ce12e859eb9c/Priprema DataSeta/Dionice" # root = r"C:\Users\Vedran Rukavina\Desktop\strojno_projektni\dionice\Priprema DataSeta\Dionice" # my data root VR extension = ".xlsx" file_list = utility.get_file_list_from_root_dir(root, extension) # print(file_list) # collect data from files sheet_name = 'Povijesni podaci' data = utility.collect_data_from_file_list(file_list, sheet_name) data_c = copy.deepcopy(data) # parse data and use only specific data for stock_name in data_c: for time_stamp in data_c[stock_name]: if data_c[stock_name][time_stamp][u'Vrsta prometa'] != 'N': del data[stock_name][time_stamp] del data_c # print(data) # get file list with index information # root = "/projects/d2e32407-baf4-4631-8512-ce12e859eb9c/Priprema DataSeta" # index_file_list = utility.get_file_list_from_root_dir(root, extension) # print(index_file_list) # collect stock index data # sheet_name = "Sheet1" # stock_num = utility.collect_data_from_file_list(index_file_list, sheet_name)["dionice"] # print(stock_num) # define stock_names and (stock_name, time_stamp) from data, mapping stock_names with time_stamp stock_names = data.keys() stock_name_time_stamps = [(stock_name, sorted(list(data[stock_name].keys()))) for stock_name in stock_names] # print(stock_name_time_stamps) # define vectors of date, high, low, close and volume prices for each stock_index_num high_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Najviša") # print(high_prices_vecs) low_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Najniža") # print(low_prices_vecs) close_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Zadnja") # print(close_prices_vecs) volume_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Broj transakcija") # print(volume_prices_vecs) avg_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Prosječna") # print(avg_prices_vecs) first_prices_vecs = utility.extract_vectors_of_specific_data(data, stock_name_time_stamps, u"Prva") # print(first_prices_vecs) # define features default_period = 14 # rsi_vecs rsi_vecs = utility.relative_strength_index(close_prices_vecs, default_period) # typical_prices_vecs typical_prices_vecs = utility.typical_prices_vecs(high_prices_vecs, low_prices_vecs, close_prices_vecs) # mfi_vecs mfi_vecs = utility.money_flow_index(typical_prices_vecs, volume_prices_vecs, default_period) # ema_vecs ema_vecs = utility.exponential_moving_average(close_prices_vecs, default_period) # so_vecs so_vecs = utility.stochastic_oscillator(high_prices_vecs, low_prices_vecs, close_prices_vecs, default_period) # macd_vecs macd_vecs = utility.moving_average_convergence_divergence(close_prices_vecs, slow_period=26, fast_period=12) # sl_vecs sl_vecs = utility.signal_line(macd_vecs, 9) for i in range(0, len(close_prices_vecs)): close_len = len(close_prices_vecs[i]) first_len = len(first_prices_vecs[i]) avg_len = len(avg_prices_vecs[i]) date_len = len(stock_name_time_stamps[i][1]) rsi_len = len(rsi_vecs[i]) mfi_len = len(mfi_vecs[i]) ema_len = len(ema_vecs[i]) so_len = len(so_vecs[i]) macd_len = len(macd_vecs[i]) sl_len = len(sl_vecs[i]) shared_data_len = min(close_len, first_len, avg_len, date_len, rsi_len, mfi_len, ema_len, so_len, macd_len, sl_len) date = stock_name_time_stamps[i][1][date_len - shared_data_len:] rsi = rsi_vecs[i][rsi_len - shared_data_len:] mfi = mfi_vecs[i][mfi_len - shared_data_len:] ema = ema_vecs[i][ema_len - shared_data_len:] so = so_vecs[i][so_len - shared_data_len:] macd = macd_vecs[i][macd_len - shared_data_len:] sl = sl_vecs[i][sl_len - shared_data_len:] fp_vecs = first_prices_vecs[i][first_len - shared_data_len:] cl_vecs = close_prices_vecs[i][close_len - shared_data_len:] avg_vecs = avg_prices_vecs[i][avg_len - shared_data_len:] save_to_xlsx(stock_name_time_stamps[i][0] + "-features", date, rsi, mfi, ema, so, macd, sl, fp_vecs, cl_vecs, avg_vecs) if __name__ == '__main__': main()