Source code for eat.io.smt

"""EHT tables
"""
from __future__ import division
from __future__ import print_function

# from future import standard_library
# standard_library.install_aliases()
# from builtins import zip
# from builtins import map
from pkg_resources import parse_version
import pandas as pd
try:
    assert(parse_version(pd.__version__) >= parse_version('0.15.1dev'))
except:
    if type(pd).__name__ == "_MockModule":
        print("processed by autodoc; pandas version comparison failed")
    else:
        print("pandas version too old")
import datetime
import numpy as np
import os

# reference MJD at 01/01/2000 00:00:00.000
mjd2000 = 51544.

# SMT total power table
TOTALPOWER_FIELDS = (
    ('mjd', float),
    ('state', str), # "Idle"
    ('n', int), # some number that counts up
    ('v1', float), # voltages, eventaully we want to figure out what LSB/USB/L/R these are
    ('v2', float),
    ('v3', float),
    ('v4', float),
    ('v5', float),
    ('v6', float),
    ('v7', float),
    ('v8', float),
)

totalpower_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0]},
    keep_date_col=True,
    date_parser=lambda dates: [datetime.datetime(2000, 1, 1) + datetime.timedelta(days=float(x)-mjd2000) for x in dates],
    names=[a[0] for a in TOTALPOWER_FIELDS],
    usecols="mjd state n v3 v7".split(),
    dtype=dict(TOTALPOWER_FIELDS)
)

[docs]def read_totalpower(filename):
    table = pd.read_csv(filename, **totalpower_pandasargs)
    return table

TAU_FIELDS = (
    ('mjd', float),
    ('tau', float),
    ('label_pm', str), # +/- symbol
    ('tau_err', float),
    ('label_zero', float), # always zero?
)

tau_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0]},
    keep_date_col=True,
    date_parser=lambda dates: [datetime.datetime(2000, 1, 1) + datetime.timedelta(days=float(x)-mjd2000) for x in dates],
    names=[a[0] for a in TAU_FIELDS],
    usecols=[a[0] for a in TAU_FIELDS if "label" not in a[0]],
    dtype=dict(TAU_FIELDS)
)

[docs]def read_tau(filename):
    table = pd.read_csv(filename, **tau_pandasargs)
    return table

# SMT tsys table
TSYS_FIELDS = (
    ('label_scan', str),
    ('scan', int),
    ('yyyymmdd', str),
    ('hhmm', str),
    ('label_channel', str),
    ('channel', int),
    ('source', str),
    ('control_key', str),
    ('label_tcal', str),
    ('tcal', float),
    ('label_tsys', str),
    ('tsys', float),
    ('label_pwv', str),
    ('pwv', float),
)

tsys_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[1,2]}, # after ignoring the label columns
    keep_date_col=True,
    date_parser=lambda dates,times: [datetime.datetime.strptime(x+y, '%Y-%m-%d%H:%M') for (x,y) in zip(dates,times)],
    names=[a[0] for a in TSYS_FIELDS],
    usecols=[a[0] for a in TSYS_FIELDS if "label" not in a[0]],
    dtype=dict(TSYS_FIELDS)
)

[docs]def read_tsys(filename):
    table = pd.read_csv(filename, **tsysdata_kwargs)
    table = table[(table.tsys > 50) & (table.tsys < 2000)]
    return table

# get scan times from unipops sdd text file
[docs]def sddscantimes(filename):
    from collections import defaultdict
    out = defaultdict(list)
    a = open(filename, 'r')
    for line in a:
        if (line[0:2] != 'sc') and (line[0:2] != 'ut'):
            continue
        tok = line.strip().split()
        if tok[0] == "scan":
            tok[1] = tok[1][:-4]
            (scan, chan) = list(map(int, tok[1].split('.')))
        elif tok[0] == "utdate":
            utdate = datetime.datetime.strptime(tok[1], "%Y.%m%d00")
        elif tok[0] == "ut":
            hours = float(tok[1])
            out['scan'].append(scan)
            out['channel'].append(chan)
            out['datetime'].append(utdate + datetime.timedelta(hours=hours))
    return pd.DataFrame(out)

# from unipops sdd text file, extract certain parameters from class and put them in a big table
# column def will be {Class ID}_{field}
# the index will have Class ID = 0
[docs]def sddfile(filename,
            cols='c1_scan c1_object c1_obsmode c3_utdate c3_ut c5_tamb c12_tcal c12_stsys c12_rtsys c12_tauh2o'.split()):
    import io
    out = []
    f = open(filename)
    classno = 'B' # classno of bootstrap
    fields = dict()
    for line in f:
        tok = line.strip().split()
        # starting new row
        if line.startswith('directory'):
            if classno is not 'B':
                out.append(",".join((fields[col] for col in cols))) # print out previous scan
            classno = 'I'   # classno of directory fields
            fields = dict() # new set of fields
        if line.startswith('Class'):
            (ignore, classno) = line.split() # set classno
        elif len(tok) == 2:
            fields['c%s_%s' % (classno, tok[0])] = tok[1]
    # last entry
    out.append(",".join((fields[col] for col in cols))) # print out previous scan
    table = pd.read_csv(io.StringIO("\n".join(out)), names=cols, header=None, dtype={'c3_utdate':str})
    if 'c1_scan' in cols:
        table['scan'] = [int(0.5+scan) for scan in table.c1_scan]
        table['channel'] = [int(0.5+100*(scan - int(scan))) for scan in table.c1_scan]
    if 'c3_utdate' in cols and 'c3_ut' in cols:
        table['datetime'] = [datetime.datetime.strptime(utdate, "%Y.%m%d00") + datetime.timedelta(hours=hours) for (utdate, hours) in table[['c3_utdate', 'c3_ut']].itertuples(index=False)]
        table['mjd'] = [(datetime.datetime.strptime(utdate, "%Y.%m%d00") - datetime.datetime(2000,1,1)).days + mjd2000 + hours/24. for (utdate, hours) in table[['c3_utdate', 'c3_ut']].itertuples(index=False)]
    return table

[docs]def tpreduce(tp):
# possible states in tp file
#              v1   2   3   4   5   6   7   8
# voltages are ch1 ch1 ch2 ch2 ch3 ch3 ch4 ch4
# and we care about the first number: e.g. v3 v7 for ch2&4
# tpreduce will isolate each "state" and run a given function (min, max, average, etc) on the voltages depending on the state
# we probably want this scan-specific, but lining up the scan info with tp is a little messy
#    {'BSP',
#     'CAL',
#     'COLD',
#     'FIVE',
#     'FOC',
#     'Idle',
#     'NSFC',
#     'PCAL',
#     'PS',
#     'QK5',
#     'SEQ',
#     'STIP',
#     'VLBI'}
    # tp['statechange'] = np.cumsum(np.hstack([1, np.diff(tp.state.map(hash)) != 0]))
    # tp['statechange'] = np.cumsum(~(tp.state == tp.state.shift(1)))
    from collections import defaultdict, OrderedDict
    g = tp.groupby(np.cumsum(~(tp.state == tp.state.shift(1))))
    def vagg(tpgroup):
        out = OrderedDict()
        out['datetime_start'] = tpgroup.datetime.iloc[0]
        out['datetime_stop'] = tpgroup.datetime.iloc[-1]
        out['datetime'] = out['datetime_start'] + (out['datetime_stop'] - out['datetime_start'])/2.
        out['state'] = tpgroup.state.iloc[0]
        out['n'] = tpgroup.n.iloc[0]
        fmap = defaultdict(lambda: np.mean)
        fmap.update(CAL=np.max, COLD=np.min)
        for key in "v1 v3 v5 v7".split():
            if key in tp.columns:
                out[key] = fmap[out['state']](tpgroup[key])
        return pd.Series(out)
    h = g.apply(vagg)
    return h

# get segments based on column based on repeated values and timestamps
[docs]def group(df, col):
    from collections import defaultdict, OrderedDict
    g = df.groupby(np.cumsum(~(df[col] == df[col].shift(1))))
    out = OrderedDict()
    def vagg(subgroup):
        out = OrderedDict()
        out['datetime_start'] = subgroup.datetime.iloc[0]
        out['datetime_stop'] = subgroup.datetime.iloc[-1]
        out['datetime'] = out['datetime_start'] + (out['datetime_stop'] - out['datetime_start'])/2.
        out[col] = subgroup[col].iloc[0]
        # Series does not work here because cannot prevent pandas from converting str columns to Timestamp
        # gets a multiindex -- probably can reduce somehow but oh well
        # but.. iterrows() will NOT preserve the dtype across rows wtf pandas
        return pd.DataFrame(out, index=subgroup.index[:1])
    h = g.apply(vagg)
    return h

# get a date range from current x limits in matplotlib
[docs]def xlim2range(ax=None):
    if ax is None:
        import matplotlib.dates
        import matplotlib.pyplot as plt
        ax = plt.gca()
    x0, x1 = ax.get_xlim()
    d0 = matplotlib.dates.num2date(x0)
    d1 = matplotlib.dates.num2date(x1)
    return d0, d1

[docs]def subset(df):
    d0, d1 = xlim2range()
    return df[(df.datetime >= d0) & (df.datetime <= d1)]

# trec=100 seems to be default, 284.8 is Kazu's number for average chopper temp
[docs]def tp2tsys(tp, trec=100., thot=284.8, tcal3=284., tcal7=284.):
    a = subset(tp) # get subset that overlaps with plot
    (v3s, v3h) = (np.min(a.v3), np.max(a.v3))
    (v7s, v7h) = (np.min(a.v7), np.max(a.v7))
    tsys3 = tcal3 * (v3s / (v3h - v3s))
    tsys7 = tcal7 * (v7s / (v7h - v7s))
    return tsys3, tsys7
Source code for eat.io.smt

EHT Analysis Toolkit

Navigation

Related Topics