"""EHT tables
"""
from __future__ import division
from __future__ import print_function
# from future import standard_library
# standard_library.install_aliases()
# from builtins import zip
# from builtins import map
from pkg_resources import parse_version
import pandas as pd
try:
assert(parse_version(pd.__version__) >= parse_version('0.15.1dev'))
except:
if type(pd).__name__ == "_MockModule":
print("processed by autodoc; pandas version comparison failed")
else:
print("pandas version too old")
import datetime
import numpy as np
import os
# reference MJD at 01/01/2000 00:00:00.000
mjd2000 = 51544.
# SMT total power table
TOTALPOWER_FIELDS = (
('mjd', float),
('state', str), # "Idle"
('n', int), # some number that counts up
('v1', float), # voltages, eventaully we want to figure out what LSB/USB/L/R these are
('v2', float),
('v3', float),
('v4', float),
('v5', float),
('v6', float),
('v7', float),
('v8', float),
)
totalpower_pandasargs = dict(
delim_whitespace=True,
comment='#',
header=None,
parse_dates={'datetime':[0]},
keep_date_col=True,
date_parser=lambda dates: [datetime.datetime(2000, 1, 1) + datetime.timedelta(days=float(x)-mjd2000) for x in dates],
names=[a[0] for a in TOTALPOWER_FIELDS],
usecols="mjd state n v3 v7".split(),
dtype=dict(TOTALPOWER_FIELDS)
)
[docs]def read_totalpower(filename):
table = pd.read_csv(filename, **totalpower_pandasargs)
return table
TAU_FIELDS = (
('mjd', float),
('tau', float),
('label_pm', str), # +/- symbol
('tau_err', float),
('label_zero', float), # always zero?
)
tau_pandasargs = dict(
delim_whitespace=True,
comment='#',
header=None,
parse_dates={'datetime':[0]},
keep_date_col=True,
date_parser=lambda dates: [datetime.datetime(2000, 1, 1) + datetime.timedelta(days=float(x)-mjd2000) for x in dates],
names=[a[0] for a in TAU_FIELDS],
usecols=[a[0] for a in TAU_FIELDS if "label" not in a[0]],
dtype=dict(TAU_FIELDS)
)
[docs]def read_tau(filename):
table = pd.read_csv(filename, **tau_pandasargs)
return table
# SMT tsys table
TSYS_FIELDS = (
('label_scan', str),
('scan', int),
('yyyymmdd', str),
('hhmm', str),
('label_channel', str),
('channel', int),
('source', str),
('control_key', str),
('label_tcal', str),
('tcal', float),
('label_tsys', str),
('tsys', float),
('label_pwv', str),
('pwv', float),
)
tsys_pandasargs = dict(
delim_whitespace=True,
comment='#',
header=None,
parse_dates={'datetime':[1,2]}, # after ignoring the label columns
keep_date_col=True,
date_parser=lambda dates,times: [datetime.datetime.strptime(x+y, '%Y-%m-%d%H:%M') for (x,y) in zip(dates,times)],
names=[a[0] for a in TSYS_FIELDS],
usecols=[a[0] for a in TSYS_FIELDS if "label" not in a[0]],
dtype=dict(TSYS_FIELDS)
)
[docs]def read_tsys(filename):
table = pd.read_csv(filename, **tsysdata_kwargs)
table = table[(table.tsys > 50) & (table.tsys < 2000)]
return table
# get scan times from unipops sdd text file
[docs]def sddscantimes(filename):
from collections import defaultdict
out = defaultdict(list)
a = open(filename, 'r')
for line in a:
if (line[0:2] != 'sc') and (line[0:2] != 'ut'):
continue
tok = line.strip().split()
if tok[0] == "scan":
tok[1] = tok[1][:-4]
(scan, chan) = list(map(int, tok[1].split('.')))
elif tok[0] == "utdate":
utdate = datetime.datetime.strptime(tok[1], "%Y.%m%d00")
elif tok[0] == "ut":
hours = float(tok[1])
out['scan'].append(scan)
out['channel'].append(chan)
out['datetime'].append(utdate + datetime.timedelta(hours=hours))
return pd.DataFrame(out)
# from unipops sdd text file, extract certain parameters from class and put them in a big table
# column def will be {Class ID}_{field}
# the index will have Class ID = 0
[docs]def sddfile(filename,
cols='c1_scan c1_object c1_obsmode c3_utdate c3_ut c5_tamb c12_tcal c12_stsys c12_rtsys c12_tauh2o'.split()):
import io
out = []
f = open(filename)
classno = 'B' # classno of bootstrap
fields = dict()
for line in f:
tok = line.strip().split()
# starting new row
if line.startswith('directory'):
if classno is not 'B':
out.append(",".join((fields[col] for col in cols))) # print out previous scan
classno = 'I' # classno of directory fields
fields = dict() # new set of fields
if line.startswith('Class'):
(ignore, classno) = line.split() # set classno
elif len(tok) == 2:
fields['c%s_%s' % (classno, tok[0])] = tok[1]
# last entry
out.append(",".join((fields[col] for col in cols))) # print out previous scan
table = pd.read_csv(io.StringIO("\n".join(out)), names=cols, header=None, dtype={'c3_utdate':str})
if 'c1_scan' in cols:
table['scan'] = [int(0.5+scan) for scan in table.c1_scan]
table['channel'] = [int(0.5+100*(scan - int(scan))) for scan in table.c1_scan]
if 'c3_utdate' in cols and 'c3_ut' in cols:
table['datetime'] = [datetime.datetime.strptime(utdate, "%Y.%m%d00") + datetime.timedelta(hours=hours) for (utdate, hours) in table[['c3_utdate', 'c3_ut']].itertuples(index=False)]
table['mjd'] = [(datetime.datetime.strptime(utdate, "%Y.%m%d00") - datetime.datetime(2000,1,1)).days + mjd2000 + hours/24. for (utdate, hours) in table[['c3_utdate', 'c3_ut']].itertuples(index=False)]
return table
[docs]def tpreduce(tp):
# possible states in tp file
# v1 2 3 4 5 6 7 8
# voltages are ch1 ch1 ch2 ch2 ch3 ch3 ch4 ch4
# and we care about the first number: e.g. v3 v7 for ch2&4
# tpreduce will isolate each "state" and run a given function (min, max, average, etc) on the voltages depending on the state
# we probably want this scan-specific, but lining up the scan info with tp is a little messy
# {'BSP',
# 'CAL',
# 'COLD',
# 'FIVE',
# 'FOC',
# 'Idle',
# 'NSFC',
# 'PCAL',
# 'PS',
# 'QK5',
# 'SEQ',
# 'STIP',
# 'VLBI'}
# tp['statechange'] = np.cumsum(np.hstack([1, np.diff(tp.state.map(hash)) != 0]))
# tp['statechange'] = np.cumsum(~(tp.state == tp.state.shift(1)))
from collections import defaultdict, OrderedDict
g = tp.groupby(np.cumsum(~(tp.state == tp.state.shift(1))))
def vagg(tpgroup):
out = OrderedDict()
out['datetime_start'] = tpgroup.datetime.iloc[0]
out['datetime_stop'] = tpgroup.datetime.iloc[-1]
out['datetime'] = out['datetime_start'] + (out['datetime_stop'] - out['datetime_start'])/2.
out['state'] = tpgroup.state.iloc[0]
out['n'] = tpgroup.n.iloc[0]
fmap = defaultdict(lambda: np.mean)
fmap.update(CAL=np.max, COLD=np.min)
for key in "v1 v3 v5 v7".split():
if key in tp.columns:
out[key] = fmap[out['state']](tpgroup[key])
return pd.Series(out)
h = g.apply(vagg)
return h
# get segments based on column based on repeated values and timestamps
[docs]def group(df, col):
from collections import defaultdict, OrderedDict
g = df.groupby(np.cumsum(~(df[col] == df[col].shift(1))))
out = OrderedDict()
def vagg(subgroup):
out = OrderedDict()
out['datetime_start'] = subgroup.datetime.iloc[0]
out['datetime_stop'] = subgroup.datetime.iloc[-1]
out['datetime'] = out['datetime_start'] + (out['datetime_stop'] - out['datetime_start'])/2.
out[col] = subgroup[col].iloc[0]
# Series does not work here because cannot prevent pandas from converting str columns to Timestamp
# gets a multiindex -- probably can reduce somehow but oh well
# but.. iterrows() will NOT preserve the dtype across rows wtf pandas
return pd.DataFrame(out, index=subgroup.index[:1])
h = g.apply(vagg)
return h
# get a date range from current x limits in matplotlib
[docs]def xlim2range(ax=None):
if ax is None:
import matplotlib.dates
import matplotlib.pyplot as plt
ax = plt.gca()
x0, x1 = ax.get_xlim()
d0 = matplotlib.dates.num2date(x0)
d1 = matplotlib.dates.num2date(x1)
return d0, d1
[docs]def subset(df):
d0, d1 = xlim2range()
return df[(df.datetime >= d0) & (df.datetime <= d1)]
# trec=100 seems to be default, 284.8 is Kazu's number for average chopper temp
[docs]def tp2tsys(tp, trec=100., thot=284.8, tcal3=284., tcal7=284.):
a = subset(tp) # get subset that overlaps with plot
(v3s, v3h) = (np.min(a.v3), np.max(a.v3))
(v7s, v7h) = (np.min(a.v7), np.max(a.v7))
tsys3 = tcal3 * (v3s / (v3h - v3s))
tsys7 = tcal7 * (v7s / (v7h - v7s))
return tsys3, tsys7