Login Page - Create Account

Support Board


Date/Time: Sun, 24 Nov 2024 08:24:49 +0000



[User Discussion] - Python SCID to TSV / CSV and Pandas DataFrame Conversion

View Count: 7666

[2013-04-20 23:05:04]
Kiwi - Posts: 375
User Discussion (Free Code)

This post describes code to convert scid files to tsv (tab separated variable, but you could easily change to comma separated (name and delimiter)).

SCID Format

This code can be used to create a Python Pandas DateTime indexed Dataframe. Use at your pleasure and risk - If you modify it or improve it, a post of the changes would be appreciated.

def getRecords(filename, tzadj):
"""
Read in records from SierraChart .scid data filename
Output a list of DateTimes and list of rows
"""
def deserialize(date_and_time):
date_tok = int(date_and_time)
time_tok = round(86400*(date_and_time - date_tok))
d = date(1899, 12, 30) + timedelta(date_tok)
t = time(*helper(time_tok, (24, 60, 60, 1000000)))
return datetime.combine(d, t)

def helper(factor, units):
factor /= 86399.99
result = list()
for unit in units:
value, factor = divmod(factor * unit, 1)
result.append(int(value))
result[3] = int(0)
return result

sizeHeader = 0x38
sizeRecord = 0x28
if tzadj == 99999:
tzAdjust = datetime.fromtimestamp(0) - datetime.utcfromtimestamp(0)
else:
tzAdjust = timedelta(hours=tzadj)
with open(filename, 'rb') as fscid:
fscid.read(sizeHeader) # discard header
rows = []
ix = []
for i in xrange(1000000):
data = fscid.read(sizeRecord)
if data != "":
d = struct.unpack('d4f4I', data)
datarow = [d[1], d[2], d[3], d[4], d[5], d[6]]
rows.append(datarow)
ix.append(deserialize(d[0] + tzAdjust.seconds/86400.0))
else:
break
return (rows, ix)


def getDataFrame(filename, tzadj):
data, ix = getRecords(filename, tzadj)
return pd.DataFrame(data, index=ix, columns=['O', 'H', 'L', 'C', 'V', 'T'])


if __name__ == '__main__':
"""
Takes a SierraChart scid file (input argument 1) and converts
it to a tab separated file (tsv)
Timezone conversion can follow the users local timezone, or a
specified integer (input l or an integer but if the default
filename is being used, '' must be specified for the filename)
"""
filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201303-HKFE-TD.scid'
tzvar = 0
if len(sys.argv) > 1:
if len(sys.argv[1]):
filename = sys.argv[1]
if len(sys.argv) > 2 and len(sys.argv[2]):
print sys.argv[2], type(sys.argv[2])
if sys.argv[2] == 'l':
tzvar = 99999
print tzvar, type(tzvar)
else:
tzvar = float(sys.argv[2])
print tzvar, type(tzvar)
tzvar = 99999
df = getDataFrame(filename, tzvar)
print df[0:10]
print 'fin'

Date Time Of Last Edit: 2013-04-21 03:51:24
[2013-04-22 00:20:17]
Kiwi - Posts: 375
This is a modified version of the TSV code that adds the option to use a different datetime format:

"""
Returns excel date time in format YYYYMMDD HHMMSS
"""


import csv
import re
import sys
import numpy as np
import struct
from time import ctime
from datetime import date, datetime, time, timedelta
# import ipdb # import ipdb; ipdb.set_trace(); ## *** ##


def deserialize(excelDateAndTime):
"""
Returns excel date time as Python datetime object
"""
date_tok = int(excelDateAndTime)
time_tok = round(86400*(excelDateAndTime - date_tok))
d = date(1899, 12, 31) + timedelta(date_tok)
t = time(*helper(time_tok, (24, 60, 60, 1000000)))
return datetime.combine(d, t)


def helper(factor, units):
factor /= 86399.99
result = list()
for unit in units:
value, factor = divmod(factor * unit, 1)
result.append(int(value))
result[3] = int(0)
return result


def datetimeTwo(excelDateAndTime, tzAdjust):
"""
Returns excel date time in format YYYYMMDD HHMMSS
"""
t = str(deserialize(excelDateAndTime + tzAdjust.seconds/86400.0))
return (re.sub(r'(:|-)', '', t)[:8], re.sub(r'(:|-)', '', t)[9:15])


def excelTimeAdjust(date_and_time, tzAdjust):
return date_and_time + tzAdjust.seconds/86400.0


def getRecords(filename, fileoutput, tzvar, dtformat):
"""
Read in records from SierraChart .scid data filename
dtformat determines the datetime representation in column 1 (and 2)
"""
sizeHeader = 0x38
sizeRecord = 0x28
if tzvar == 99999:
tzAdjust = datetime.fromtimestamp(0) - datetime.utcfromtimestamp(0)
else:
tzAdjust = timedelta(hours=tzvar)

header = ('Date', 'Time', 'O', 'H', 'L', 'C', 'V', 'T')
ftsv = open(fileoutput, 'w')
fileout = csv.writer(ftsv, delimiter='\t')
with open(filename, 'rb') as fscid:
fscid.read(sizeHeader) # discard header
fileout.writerow(header)
for i in xrange(300000):
data = fscid.read(sizeRecord)
if data != "":
dataRow = struct.unpack('d4f4I', data)
if dtformat == 0:
adjustedTime = dataRow[0] + tzAdjust.seconds/86400.0
outrow = (str(adjustedTime), str(adjustedTime),
str(dataRow[1]), str(dataRow[2]), str(dataRow[3]),
str(dataRow[4]), str(dataRow[5]), str(dataRow[6]))
fileout.writerow(outrow)
elif dtformat == 1:
date, time = datetimeTwo(dataRow[0], tzAdjust)
outrow = (date, time,
str(dataRow[1]), str(dataRow[2]), str(dataRow[3]),
str(dataRow[4]), str(dataRow[5]), str(dataRow[6]))
fileout.writerow(outrow)
else:
break
return


if __name__ == '__main__':
"""
Takes a SierraChart scid file (input argument 1) and converts
it to a tab separated file (tsv)
Timezone conversion can follow the users local timezone, or a
specified integer (input l or an integer but if the default
filename is being used, '' must be specified for the filename)
"""
filename = '/home/john/zRamdisk/SierraChart/Data/HSI-201303-HKFE-TD.scid'
tzvar = 0
if len(sys.argv) > 1:
if len(sys.argv[1]):
filename = sys.argv[1]
if len(sys.argv) > 2 and len(sys.argv[2]):
print sys.argv[2], type(sys.argv[2])
if sys.argv[2] == 'l':
tzvar = 99999
print tzvar, type(tzvar)
else:
tzvar = float(sys.argv[2])
print tzvar, type(tzvar)
fileoutput = filename[:-4] + 'tsv'
getRecords(filename, fileoutput, tzvar, 1)

[2024-06-20 16:44:27]
User569223 - Posts: 3
in getRecords:
in for loop:
dataRow = struct.unpack('d4f4I', data)
d = struct.unpack('d4f4I', data)

When unpacking row in Data, you should be using 'q4ffI' to accommodate datetime of size 64-bit integer.

dataRow = struct.unpack('q4f4I', data)
d = struct.unpack('q4f4I', data)
Date Time Of Last Edit: 2024-06-20 16:44:59

To post a message in this thread, you need to log in with your Sierra Chart account:

Login

Login Page - Create Account