Login Page - Create Account

Support Board


Date/Time: Sat, 23 Nov 2024 18:04:00 +0000



Post From: Python for Sierra Chart

[2013-06-20 23:10:17]
Kiwi - Posts: 375
Updated Version with new methods in for converting the dataframe to a longer timeframe and for mapping longer timeframe development onto the lower timeframe.

#!/usr/bin/python3
from __future__ import print_function
import numpy as np
import pandas as pd
import struct
import sys
from time import sleep, time

o = O = 'O'
h = H = 'H'
l = L = 'L'
c = C = 'C'
v = V = 'V'
x = 'x'
y = 'y'
z = 'z'

time_list = []
overrun_list = []
overruns = 0

lt = 15
mt = 5
st = 1

ohlc = {o: 'first', h: 'max', l: 'min', c: 'last',
v: 'sum', x: 'sum', y: 'sum', z: 'sum'}
cols = [O, H, L, C, V, x, y, z]
time_list = []



class SierraFile(object):
""" """
def __init__(self, filename):
self.filename = str(filename)
# self.tzAdjust = t imedelta(hours=+10).seconds/d2s
self.tzAdjust = np.timedelta64(10, 'h') / np.timedelta64(1, 'D')
self.excelDate = np.datetime64('1899-12-30')
self.sizeHeader = 0x38
self.sizeRecord = 0x28
self.pos = 0
self.last = 0

def read_existing_records(self):
with open(self.filename, 'rb') as fscid:
fscid.read(self.sizeHeader) # discard header
rows = []
ts = []
for i in range(1000000):
data = fscid.read(self.sizeRecord)
if data not in ('', b''):
d = struct.unpack('d4f4I', data)
dt = d[0] + self.tzAdjust
ts.append(self.excelDate + np.timedelta64(int(dt))
+ (np.timedelta64(int(round((dt - int(dt))
* 86400)), 's')))
datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0]
rows.append(datarow)
else:
break
self.pos = self.last = fscid.tell()
return (ts, rows)

def read_record(self):
global overruns, overrun_list
with open(self.filename, 'rb') as fscid:
fscid.seek(0, 2) # Go to the end of the file
self.last = fscid.tell()
if self.last == self.pos: # no new data >> nothing to do
return (-999, 0, 0)
else: # data to collect
if self.pos < self.last - self.sizeRecord: # > 1 record
print('Overrun', self.last - self.pos,
(self.last - self.pos) / self.sizeRecord)
overruns += 1
overrun_list.append(np.datetime64('now'))
late_flag = True
else:
late_flag = False
fscid.seek(self.pos, 0)
self.pos += self.sizeRecord
data = fscid.read(self.sizeRecord)
d = struct.unpack('d4f4I', data)
dt = d[0] + self.tzAdjust
new_time = (self.excelDate + np.timedelta64(int(dt))
+ (np.timedelta64(int(round((dt - int(dt))
* 86400)), 's')))
datarow = [d[1], d[2], d[3], d[4], d[5], 0, 0, 0]
return (new_time, datarow, late_flag)

def write_existing_records(self, dataframe):
with open(self.filename, 'wb') as fscid:
header = b'SCID8\x00\x00\x00(\x00\x00\x00\x01\x00'
fscid.write(header)
for i in range(21):
fscid.write(b'\x00\x00')
for i in range(dataframe.end):
da = ((dataframe.df.index.values[i] - self.excelDate)
/ np.timedelta64(1, 'D') - self.tzAdjust)
db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
di = 0x11100111
df = int(df)
dg = int(dg)
dh = int(dh)
di = int(di)
wt = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
fscid.write(wt)

def write_record(self, dataframe):
with open(self.filename, 'ab') as fscid:
i = dataframe.end - 1
da = ((dataframe.df.index.values[i] - self.excelDate)
/ np.timedelta64(1, 'D') - self.tzAdjust)
db, dc, dd, de, df, dg, dh, di = dataframe.df.iloc[i]
di = 0x88300388
df = int(df)
dg = int(dg)
dh = int(dh)
di = int(di)
record = struct.pack('d4f4I', da, db, dc, dd, de, df, dg, dh, di)
fscid.write(record)


class SierraFrame(object):
"""
DataFrame is the basic object for analysis:
init reads the .scid file into the initial object, 5 sec assumed
extend_frame adds 5000 rows to the df because appending rows is slow
add appends new data in the extended frame for real time operation
build_tf creates a new dataframe that is a multiplier of the input df
build_htf_array creates an array showing higher timeframe bars as
they develop for the lower timeframe array
countfloats is a test method
"""
def __init__(self, time_index, data):
self.df = pd.DataFrame(data, index=time_index,
columns=[O, H, L, C, V, x, y, z])
self.end = len(self.df)
self.pos = 0

def extend_frame(self):
'''
Create a 5000 row array from last time in self.df
and append it to self.df
Remove lunch break from array
'''
print('Extending DataFrame Now')
s5 = np.timedelta64(5, 's')
h1 = np.timedelta64(1, 'h')
sl = np.datetime64('today') + np.timedelta64(14, 'h')
el = np.datetime64('today') + np.timedelta64(15, 'h')
start_time = self.df.index.values[self.end - 1]
dtgen = ((start_time + i * s5) for i in range(1, 5000))
dtstrip = ((i + h1 if sl <= i < el else i) for i in dtgen)
dg = pd.DataFrame(index=dtstrip, columns=self.df.columns)
#dg.iloc[:] = 0.0
#dg[[v, x, y, z]] = dg[[v, x, y, z]].astype('int')
self.df = self.df.append(dg)
self.df = self.df.astype(np.float64)

def add(self, new_time, datarow):
'''
Add a row to an existing extended df but:
extend if its within 5 of the end
fill with last bar if its not the next bar
convert the four integer columns to float for df speed of access
'''
if self.end > len(self.df) - 5:
self.extend_frame() # not needed if first fill > day length
np_time = np.datetime64(new_time)
if np_time < self.df.index.values[self.end]:
return # new data is earlier than current
while np_time > self.df.index.values[self.end]:
self.df.iloc[self.end] = self.df.iloc[self.end - 1]
self.end += 1 # fill with prior row if new is later
for i in [4, 5, 6, 7]:
datarow[i] = float(datarow[i])
self.df.iloc[self.end] = datarow # fill when times match
#self.df.iloc[self.end] = self.df.iloc[self.end].astype(np.float64)
self.end += 1

def build_tf(self, ht):
'''
Create higher timeframe df that is a multiplier of the input, di
with ht being the high timeframe bar length in minutes
'''
return self.df.resample(str(ht)+'min', how=ohlc)[cols]

def build_htf_array(self, st, ht):
'''
Map higher timeframe development on to input df
with ht being the high timeframe bar length in minutes
'''
di = self.df.resample(str(st)+'min', how=ohlc)[cols]
dih = di.iloc[:,0:5]
for i in range(len(dih)):
if i == 0 or i//ht > (i-1)//ht:
bO = dih.iloc[i, 0]
bH = dih.iloc[i, 1]
bL = dih.iloc[i, 2]
bC = dih.iloc[i, 3]
else:
dih.iloc[i, 0] = bO
dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1])
dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2])
bC = dih.iloc[i, 3]
return dih

def countfloats(self):
length = len (self.df)
width = len(self.df.iloc[0])
floats = 0
nonfloats = 0
for i in range(length):
for j in range(width):
if isinstance(self.df.iloc[i,j], float):
floats += 1
else:
nonfloats += 1
return (floats, nonfloats)

def build_htf_array(di, ht):
'''
Map higher timeframe development on to input df
with ht being the high timeframe bar length in minutes
'''
dih = di.iloc[:,0:5].copy()
for i in range(len(dih)):
if i == 0 or i//ht > (i-1)//ht:
bO = dih.iloc[i, 0]
bH = dih.iloc[i, 1]
bL = dih.iloc[i, 2]
bC = dih.iloc[i, 3]
else:
dih.iloc[i, 0] = bO
dih.iloc[i, 1] = bH = max(bH, dih.iloc[i, 1])
dih.iloc[i, 2] = bL = min(bL, dih.iloc[i, 2])
bC = dih.iloc[i, 3]
return dih

def build_tf(di, ht):
'''
Create higher timeframe df that is a multiplier of the input, di
with ht being the high timeframe bar length in minutes
'''
return di.resample(str(ht)+'min', how=ohlc)[cols]



def SierraRun():
global time_list
time0 = time()
#filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201306-HKFE-TD.scid'
filename = '/home/john/zRamdisk/SierraChart/Data/HSIM13-FUT-HKFE-TD.scid'
hsi = SierraFile(filename)
time_index, data = hsi.read_existing_records()
da = SierraFrame(time_index, data)
import ipdb; ipdb.set_trace() # XXX BREAKPOINT
da.extend_frame()
wtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
wtst.write_existing_records(da)
print('df ready', da.end - 1, time() - time0)
print(da.df[da.end - 1:da.end + 1])
print()
df = da.df
print('\n', np.datetime64('now'), da.end)
print(df[da.end - 5:da.end + 5])

import ipdb; ipdb.set_trace() # XXX BREAKPOINT


#time_list = []
#for i in range(4000):
#intime = df.index.values[da.end]
#time0 = time()
#da.add(intime, [1.0, 2.0, 3.0, 4.0, 5, 6, 7, 8])
#time_list.append(time() - time0)

#if time_list:
#print('TimeStats', max(time_list),
#sum(time_list) / len(time_list))
#print('\nEnd of NaN version')

# print('next', hsi.pos, hsi.last)
# jtst = SierraFile('/home/john/zRamdisk/SierraChart/Data/HSI-INPUT.scid')
# time_index, data = jtst.read_existing_records()
# ja = SierraFrame(time_index, data)
# jf = ja.df
# print('\n', ja.end)
# print(df[ja.end-5:ja.end+5])
# print('next', jtst.pos, jtst.last)
# return # ###################
counter = 0
# sys.stdout = os.fdopen(sys.stdout.fileno(), "w", newline=None)
counter_flag = False
timer_no_data = time()
timer_no_data_flag = False
overruns = 0
overrun_list = []
while True:
time0 = time()
new_time, data, late_flag = hsi.read_record()
if new_time != -999:
#time1 = time()
da.add(new_time, data)
#print("{:.6f}".format(time() - time1), end = ' ')
sys.stdout.flush()
wtst.write_record(da)
if counter > 3:
time_list.append(time() - time0)
timer_no_data = time()
#print(da.df[da.end-1:da.end], da.end)
print('.', end=' ')
sys.stdout.flush()
if timer_no_data_flag:
print('Data Restored')
timer_no_data = time()
timer_no_data_flag = False
counter += 1
counter_flag = True
if time() - timer_no_data >= 120 and not timer_no_data_flag:
timer_no_data_flag = True
print('Data lost for two minutes')
if not late_flag:
sleep_time = 0.1 - (time() - time0)
if sleep_time > 0:
sleep(sleep_time)
if counter % 12 == 0 and counter_flag:
counter_flag = False
print(' Overruns:', overruns, overrun_list, end=' ')
print('TimeStats', "{:.6f} {:.6f}".format(max(time_list),
sum(time_list) / len(time_list)), '\n', end=' ')
# print(df[da.end-1:da.end])
sys.stdout.flush()
# break
if counter % 60 == 0 and counter != 0:
import ipdb; ipdb.set_trace() # XXX BREAKPOINT


def main():
SierraRun()

if __name__ == '__main__':
"""
Takes a SierraChart scid file (input argument 1) and converts
it to a Pandas DataFrame
Timezone conversion can follow the users local timezone, or a
specified integer (input l or an integer but if the default
filename is being used, '' must be specified for the filename)
"""
print('start')
sys.stdout.flush()
main()
print('fin')
if time_list != []:
print('TimeStats', "{:.6f} {:.6f}".format(max(time_list),
sum(time_list) / len(time_list)), '\n', end=' ')

Date Time Of Last Edit: 2013-06-21 02:52:49