Support Board
Date/Time: Sat, 23 Nov 2024 18:04:00 +0000
Post From: Python for Sierra Chart
[2013-06-20 23:10:17] |
Kiwi - Posts: 375 |
Updated Version with new methods in for converting the dataframe to a longer timeframe and for mapping longer timeframe development onto the lower timeframe. #!/usr/bin/python3
from __future__ import print_function import numpy as np import pandas as pd import struct import sys from time import sleep, time o = O = 'O' h = H = 'H' l = L = 'L' c = C = 'C' v = V = 'V' x = 'x' y = 'y' z = 'z' time_list = [] overrun_list = [] overruns = 0 lt = 15 mt = 5 st = 1 ohlc = {o: 'first', h: 'max', l: 'min', c: 'last', v: 'sum', x: 'sum', y: 'sum', z: 'sum'} cols = [O, H, L, C, V, x, y, z] time_list = [] class SierraFile(object): """ """ def __init__(self, filename): self.filename = str(filename) # self.tzAdjust = t imedelta(hours=+10).seconds/d2s self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D') self.excelDate = np.datetime64('1899-12-30') self.sizeHeader = 0x38 self.sizeRecord = 0x28 self.pos = 0 self.last = 0 def read_existing_records(self): with open(self.filename, 'rb') as fscid: fscid.read(self.sizeHeader) # discard header rows = [] ts = [] for i in range(1000000): data = fscid.read(self.sizeRecord) if data not in ('', b''): d = struct.unpack('d4f4I', data) dt = d[0] + self.tzAdjust ts.append(self.excelDate + np.timedelta64(int(dt)) + (np.timedelta64(int(round((dt - int(dt)) * 86400)), 's'))) datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0] rows.append(datarow) else: break self.pos = self.last = fscid.tell() return (ts, rows) def read_record(self): global overruns, overrun_list with open(self.filename, 'rb') as fscid: fscid.seek(0, 2) # Go to the end of the file self.last = fscid.tell() if self.last == self.pos: # no new data >> nothing to do return (-999, 0, 0) else: # data to collect if self.pos < self.last - self.sizeRecord: # > 1 record print('Overrun', self.last - self.pos, (self.last - self.pos) / self.sizeRecord) overruns += 1 overrun_list.append(np.datetime64('now')) late_flag = True else: late_flag = False fscid.seek(self.pos, 0) self.pos += self.sizeRecord data = fscid.read(self.sizeRecord) d = struct.unpack('d4f4I', data) dt = d[0] + self.tzAdjust new_time = (self.excelDate + np.timedelta64(int(dt)) + (np.timedelta64(int(round((dt - int(dt)) * 86400)), 's'))) datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0] return (new_time, datarow, late_flag) def write_existing_records(self, dataframe): with open(self.filename, 'wb') as fscid: header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00' fscid.write(header) for i in range(21): fscid.write(b'\x00\x00') for i in range(dataframe.end): da = ((dataframe.df.index.values[i] - self.excelDate) / np.timedelta64(1, 'D') - self.tzAdjust) db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i] di = 0x11100111 df = int(df) dg = int(dg) dh = int(dh) di = int(di) wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di) fscid.write(wt) def write_record(self, dataframe): with open(self.filename, 'ab') as fscid: i = dataframe.end - 1 da = ((dataframe.df.index.values[i] - self.excelDate) / np.timedelta64(1, 'D') - self.tzAdjust) db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i] di = 0x88300388 df = int(df) dg = int(dg) dh = int(dh) di = int(di) record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di) fscid.write(record) class SierraFrame(object): """ DataFrame is the basic object for analysis: init reads the .scid file into the initial object, 5 sec assumed extend_frame adds 5000 rows to the df because appending rows is slow add appends new data in the extended frame for real time operation build_tf creates a new dataframe that is a multiplier of the input df build_htf_array creates an array showing higher timeframe bars as they develop for the lower timeframe array countfloats is a test method """ def __init__(self, time_index, data): self.df = pd.DataFrame(data, index=time_index, columns=[O, H, L, C, V, x, y, z]) self.end = len(self.df) self.pos = 0 def extend_frame(self): ''' Create a 5000 row array from last time in self.df and append it to self.df Remove lunch break from array ''' print('Extending DataFrame Now') s5 = np.timedelta64(5, 's') h1 = np.timedelta64(1, 'h') sl = np.datetime64('today') + np.timedelta64(14, 'h') el = np.datetime64('today') + np.timedelta64(15, 'h') start_time = self.df.index.values[self.end - 1] dtgen = ((start_time + i * s5) for i in range(1, 5000)) dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen) dg = pd.DataFrame(index=dtstrip, columns=self.df.columns) #dg.iloc[:] = 0.0 #dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int') self.df = self.df.append(dg) self.df = self.df.astype(np.float64) def add(self, new_time, datarow): ''' Add a row to an existing extended df but: extend if its within 5 of the end fill with last bar if its not the next bar convert the four integer columns to float for df speed of access ''' if self.end > len(self.df) - 5: self.extend_frame() # not needed if first fill > day length np_time = np.datetime64(new_time) if np_time < self.df.index.values[self.end]: return # new data is earlier than current while np_time > self.df.index.values[self.end]: self.df.iloc[self.end] = self.df.iloc[self.end - 1] self.end += 1 # fill with prior row if new is later for i in [4, 5, 6, 7]: datarow[i] = float(datarow[i]) self.df.iloc[self.end] = datarow # fill when times match #self.df.iloc[self.end] = self.df.iloc[self.end].astype(np.float64) self.end += 1 def build_tf(self, ht): ''' Create higher timeframe df that is a multiplier of the input, di with ht being the high timeframe bar length in minutes ''' return self.df.resample(str(ht)+'min', how=ohlc)[cols] def build_htf_array(self, st, ht): ''' Map higher timeframe development on to input df with ht being the high timeframe bar length in minutes ''' di = self.df.resample(str(st)+'min', how=ohlc)[cols] dih = di.iloc[:,0:5] for i in range(len(dih)): if i == 0 or i//ht > (i-1)//ht: bO = dih.iloc[i, 0] bH = dih.iloc[i, 1] bL = dih.iloc[i, 2] bC = dih.iloc[i, 3] else: dih.iloc[i, 0] = bO dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1]) dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2]) bC = dih.iloc[i, 3] return dih def countfloats(self): length = len (self.df) width = len(self.df.iloc[0]) floats = 0 nonfloats = 0 for i in range(length): for j in range(width): if isinstance(self.df.iloc[i,j], float): floats += 1 else: nonfloats += 1 return (floats, nonfloats) def build_htf_array(di, ht): ''' Map higher timeframe development on to input df with ht being the high timeframe bar length in minutes ''' dih = di.iloc[:,0:5].copy() for i in range(len(dih)): if i == 0 or i//ht > (i-1)//ht: bO = dih.iloc[i, 0] bH = dih.iloc[i, 1] bL = dih.iloc[i, 2] bC = dih.iloc[i, 3] else: dih.iloc[i, 0] = bO dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1]) dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2]) bC = dih.iloc[i, 3] return dih def build_tf(di, ht): ''' Create higher timeframe df that is a multiplier of the input, di with ht being the high timeframe bar length in minutes ''' return di.resample(str(ht)+'min', how=ohlc)[cols] def SierraRun(): global time_list time0 = time() #filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid' filename = '/home/john/zRamdisk/SierraChart/Data/HSIM13-FUT-HKFE-TD.scid' hsi = SierraFile(filename) time_index, data = hsi.read_existing_records() da = SierraFrame(time_index, data) import ipdb; ipdb.set_trace() # XXX BREAKPOINT da.extend_frame() wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid') wtst.write_existing_records(da) print('df ready', da.end - 1, time() - time0) print(da.df[da.end - 1:da.end + 1]) print() df = da.df print('\n', np.datetime64('now'), da.end) print(df[da.end - 5:da.end + 5]) import ipdb; ipdb.set_trace() # XXX BREAKPOINT #time_list = [] #for i in range(4000): #intime = df.index.values[da.end] #time0 = time() #da.add(intime, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8]) #time_list.append(time() - time0) #if time_list: #print('TimeStats', max(time_list), #sum(time_list) / len(time_list)) #print('\nEnd of NaN version') # print('next', hsi.pos, hsi.last) # jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid') # time_index, data = jtst.read_existing_records() # ja = SierraFrame(time_index, data) # jf = ja.df # print('\n', ja.end) # print(df[ja.end-5:ja.end+5]) # print('next', jtst.pos, jtst.last) # return # ################### counter = 0 # sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None) counter_flag = False timer_no_data = time() timer_no_data_flag = False overruns = 0 overrun_list = [] while True: time0 = time() new_time, data, late_flag = hsi.read_record() if new_time != -999: #time1 = time() da.add(new_time, data) #print("{:.6f}".format(time() - time1), end = ' ') sys.stdout.flush() wtst.write_record(da) if counter > 3: time_list.append(time() - time0) timer_no_data = time() #print(da.df[da.end-1:da.end], da.end) print('.', end=' ') sys.stdout.flush() if timer_no_data_flag: print('Data Restored') timer_no_data = time() timer_no_data_flag = False counter += 1 counter_flag = True if time() - timer_no_data >= 120 and not timer_no_data_flag: timer_no_data_flag = True print('Data lost for two minutes') if not late_flag: sleep_time = 0.1 - (time() - time0) if sleep_time > 0: sleep(sleep_time) if counter % 12 == 0 and counter_flag: counter_flag = False print(' Overruns:', overruns, overrun_list, end=' ') print('TimeStats', "{:.6f} {:.6f}".format(max(time_list), sum(time_list) / len(time_list)), '\n', end=' ') # print(df[da.end-1:da.end]) sys.stdout.flush() # break if counter % 60 == 0 and counter != 0: import ipdb; ipdb.set_trace() # XXX BREAKPOINT def main(): SierraRun() if __name__ == '__main__': """ Takes a SierraChart scid file (input argument 1) and converts it to a Pandas DataFrame Timezone conversion can follow the users local timezone, or a specified integer (input l or an integer but if the default filename is being used, '' must be specified for the filename) """ print('start') sys.stdout.flush() main() print('fin') if time_list != []: print('TimeStats', "{:.6f} {:.6f}".format(max(time_list), sum(time_list) / len(time_list)), '\n', end=' ') Date Time Of Last Edit: 2013-06-21 02:52:49
|