Source code for eat.io.hops

# I/O routines for HOPS ASCII tables
# 2016-10-11 Lindy Blackburn

from __future__ import print_function
# import builtins
# if 'zip' in dir(builtins):
#     zip = builtins.zip
#     range = builtins.range
from pkg_resources import parse_version
import pandas as pd
# allow storing of metadata in attribute "metadata"
if 'metadata' not in pd.DataFrame._metadata:
    pd.DataFrame._metadata.append('metadata')
try:
    assert(parse_version(pd.__version__) >= parse_version('0.15.1dev'))
except:
    if type(pd).__name__ == "_MockModule":
        print("processed by autodoc; pandas version comparison failed")
    else:
        print("pandas version too old")
import datetime
import numpy as np
import os
import sys

[docs]def condense_formats(fmtlist):
    return map(lambda fmt: fmt if fmt.count('%') <= 1 else "%s", fmtlist)

# from write_fsumm.c

fformat_v5 = condense_formats("%1d %s 2 %2d %3d %3d %3d %4d %s %02d%03d-%02d%02d%02d %4d\
 %03d-%02d%02d%02d %3d %-8s %s %c%c %c%02d %2s %4d %6.2f %#5.4g %5.1f %#5.4g %2s %6.3f %8.5f\
 %6.4f %8.3f %4.1f %4.1f %5.1f %5.1f %7.4g %7.4g %06d %02d%02d %8.2f %5.1f %11.8f\
 %13.6f %5.3f %3d %3d\n".replace(' 2 ', ' %d ').strip().split())

fformat_v6 = condense_formats("%1d %s 2 %2d %3d %3d %3d %4d %8s %04d%03d-%02d%02d%02d\
 %4d %03d-%02d%02d%02d %3d %32s %2s %c%c\
 %c%02d %2s %5d\
 %#13.8g %#13.8g %11.6f %#11.6g %2s\
 %+12.9f %+12.9f %11.9f\
 %+11.6f %5.2f %5.2f %6.2f %6.2f %7.4g %7.4g %06d\
 %02d%02d %9.3f %10.6f %11.8f\
 %13.6f %+9.6f %8d %8d %+10.6f %+10.6f %+13.10f\n".replace(' 2 ', ' %d ').strip().split())

tformat_v6 = condense_formats("%1d %4d 3 %8s %4d %03d-%02d%02d%02d %3d %32s\
 %c%c %4d %3s %20s %11s %14s\
 %3d %3d  %c  %c %06d\
 %10.3f %8.3f %+8.3f %2s %7.4f\
 %8.5f %6.4f %9.5f %14s %11s\
  %02d%02d %10.3f %7d\n".replace(' 3 ', ' %d ').strip().split())

ffields_v5 = [a.strip() for a in """
version,
root_id,
two,
extent_no,
duration,
length,
offset,
expt_no,
scan_id,
procdate,
year,
timetag,
scan_offset,
source,
baseline,
quality,
freq_code,
polarization,
lags,
amp,
snr,
resid_phas,
phase_snr,
datatype,
sbdelay,
mbdelay,
ambiguity,
delay_rate,
ref_elev,
rem_elev,
ref_az,
rem_az,
u,
v,
esdesp,
epoch,
ref_freq,
total_phas,
total_rate,
total_mbdelay,
total_sbresid,
srch_cotime,
noloss_cotime
""".split(',')]

ffields_v6 = [a.strip() for a in """
version,
root_id,
two,
extent_no,
duration,
length,
offset,
expt_no,
scan_id,
procdate,
year,
timetag,
scan_offset,
source,
baseline,
quality,
freq_code,
polarization,
lags,
amp,
snr,
resid_phas,
phase_snr,
datatype,
sbdelay,
mbdelay,
ambiguity,
delay_rate,
ref_elev,
rem_elev,
ref_az,
rem_az,
u,
v,
esdesp,
epoch,
ref_freq,
total_phas,
total_rate,
total_mbdelay,
total_sbresid,
srch_cotime,
noloss_cotime,
ra_hrs,
dec_deg,
resid_delay
""".split(',')]

tfields_v6 = [a.strip() for a in """
version,
expt_no,
three,
scan_id,
year,
timetag,
scan_offset,
source,
freq_code,
lags,
triangle,
roots,
extents,
lengths,
duration,
offset,
scanqual,
dataqual,
esdesp,
bis_amp,
bis_snr,
bis_phas,
datatype,
csbdelay,
cmbdelay,
ambiguity,
cdelay_rate,
elevations,
azimuths,
epoch,
ref_freq,
cotime
""".split(',')]

fsumm_v5_pandasargs = dict(
    delim_whitespace=True,
    comment='*',
    header=None,
    # fringe quality code
    dtype={15:str},
    parse_dates={'datetime':[10,11]},
    # index_col='datetime',
    keep_date_col=True,
    # note: pandas 0.15.1 cannot use generator for date_parser (unlike 0.18), so changed to a list comprehension
    date_parser=lambda years,times: [datetime.datetime.strptime(x+y, '%y%j-%H%M%S') for (x,y) in zip(years,times)],
    names=ffields_v5,
)

fsumm_v6_pandasargs = dict(
    delim_whitespace=True,
    comment='*',
    header=None,
    dtype={15:str},
    parse_dates={'datetime':[10,11]},
    # index_col='datetime',
    keep_date_col=True,
    # note: pandas 0.15.1 cannot use generator for date_parser (unlike 0.18), so changed to a list comprehension
    date_parser=lambda years,times: [datetime.datetime.strptime(x+y, '%Y%j-%H%M%S') for (x,y) in zip(years,times)],
    names=ffields_v6,
)

tsumm_pandasargs = dict(
    delim_whitespace=True,
    comment='*',
    header=None,
    dtype={16:str, 17:str},
    parse_dates={'datetime':[4,5]},
    # index_col='datetime',
    keep_date_col=True,
    # note: pandas 0.15.1 cannot use generator for date_parser (unlike 0.18), so changed to a list comprehension
    date_parser=lambda years,times: [datetime.datetime.strptime(x+y, '%Y%j-%H%M%S') for (x,y) in zip(years,times)],
)

[docs]def read_alist_v5(filename):
    table = pd.read_csv(filename, **fsumm_v5_pandasargs)
    return table

[docs]def read_alist_v6(filename):
    table = pd.read_csv(filename, **fsumm_v6_pandasargs)
    return table

[docs]def read_tlist_v6(filename):
    table = pd.read_csv(filename, names=tfields_v6, **tsumm_pandasargs)
    return table

fformatters_v5 = {col:lambda x, fmt=fmt: fmt % x for col,fmt in zip(ffields_v5, fformat_v5)}
fformatters_v5['year'] = lambda x: '%s' % x # necessary because pandas parse_dates will keep date col as str (!?)
fformatters_v6 = {col:lambda x, fmt=fmt: fmt % x for col,fmt in zip(ffields_v6, fformat_v6)}
fformatters_v6['year'] = lambda x: '%s' % x # necessary because pandas parse_dates will keep date col as str (!?)
tformatters_v6 = {col:lambda x, fmt=fmt: fmt % x for col,fmt in zip(tfields_v6, tformat_v6)}
tformatters_v6['year'] = lambda x: '%s' % x # necessary because pandas parse_dates will keep date col as str (!?)

[docs]def write_alist_v5(df, out=sys.stdout):
    if type(out) is str:
        out = open(out, 'w')
    df.to_string(buf=out, columns=ffields_v5, formatters=fformatters_v5, header=False, index=False)

[docs]def write_alist_v6(df, out=sys.stdout):
    if type(out) is str:
        out = open(out, 'w')
    df.to_string(buf=out, columns=ffields_v6, formatters=fformatters_v6, header=False, index=False)

[docs]def write_tlist_v6(df, out=sys.stdout):
    if type(out) is str:
        out = open(out, 'w')
    df.to_string(buf=out, columns=tfields_v6, formatters=tformatters_v6, header=False, index=False)

[docs]def get_alist_version(filename):
    code = (a[0] for a in open(filename) if a[0].isdigit())
    return int(next(code))

# read_alist automatically determine version
# ALIST notes:
# mk4 correlated data will have az,el = 0
# difx correlated data will have az,el != 0, but may still have u,v = 0
[docs]def read_alist(filename):
    """Read alist file into pandas table, automatically deteremine version (v5 or v6)

    Args:
        filename (str): alist filename

    Returns:
        pandas.DataFrame with alist contents
    """
    ver = get_alist_version(filename)
    if ver == 5:
        table = read_alist_v5(filename)
    elif ver == 6:
        table = read_alist_v6(filename)
    else:
        import sys
        sys.exit('alist is not version 5 or 6')
    return table

# master calibration file from vincent
MASTERCAL_FIELDS = (
    ('dd_hhmm', str), # timetag without seconds
    ('source', str),  # radio source name
    ('len', int),     # length of observation in seconds
    ('el_ap', int),   # Ap APEX -- elevation --
    ('el_az', int),   # Az SMTL
    ('el_sm', int),   # Sm JCMTR
    ('el_cm', int),   # Cm CARMA1L
    ('el_pl', int),   # PL PICOL
    ('el_pb', int),   # PB PDBL
    ('pa_ap', int),   # Ap -- parallactic angle --
    ('pa_az', int),   # Az
    ('pa_sm', int),   # Sm
    ('pa_cm', int),   # Cm
    ('pa_pl', int),   # PL
    ('pa_pb', int),   # PB
    ('smt_tsysl', float),    # -- [S/T] SMT --
    ('smt_tsysr', float),
    ('smt_tau', float),
    ('smalow_sefd', float),  # -- [P/Q] SMA low --
    ('smalow_pheff', float),
    ('smahigh_sefd', float), # -- [P/Q] SMA high --
    ('smahigh_pheff', float),
    ('d_sefd', float),       # -- [D] CARMA Ref LCP --
    ('e_sefd', float),       # -- [E] CARMA Ref RCP --
    ('flow_sefd', float),    # -- [F] CARMA Phased LCP low --
    ('fhigh_sefd', float),   # -- [F] CARMA Phased LCP high --
    ('glow_sefd', float),    # -- [G] CARMA Phased RCP low --
    ('ghigh_sefd', float),   # -- [G] CARMA Phased RCP high --
    ('carma_tau', float),    # -- CARMA --
    ('carma_path', float),
    ('carma_phef', float),
    ('jcmt_tsys', float),    # -- [-/J] JCMT --
    ('jcmt_tau', float),
    ('jcmt_gap', float),
)

mastercal_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0]},
    # index_col='datetime',
    keep_date_col=True,
    date_parser=lambda dates: [datetime.datetime.strptime('13'+y, '%y%j-%H%M') for y in dates],
    names=[a[0] for a in MASTERCAL_FIELDS]
)

[docs]def read_mastercal(filename):
    table = pd.read_csv(filename, **mastercal_pandasargs)
    return table

# calibrated data table, after calibration amplitdues from alist
CALTABLE_FIELDS = (
    ('day', str), # day
    ('hhmm', str),
    ('source', str),
    ('baseline', str),
    ('u', float),
    ('v', float),
    ('uvmag', float),
    ('sefd_1', float),
    ('sefd_2', float),
    ('calamp', float),
    ('amp', float),
    ('snr', float),
    ('el_1', float),
    ('el_2', float),
    ('expt', int),
    ('band', int),
)

caltable_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0,1]},
    keep_date_col=True,
    date_parser=lambda dates,times: [x+y for (x,y) in zip(dates,times)],
    names=[a[0] for a in CALTABLE_FIELDS],
    error_bad_lines=False,
    warn_bad_lines=False,
)

# calibrated data will have u,v filled in
[docs]def read_caltable(filename):
    table = pd.read_csv(filename, **caltable_pandasargs)
    # keep missing data (u,v coords still good)
    # table.dropna(how="any", inplace=True)
    return table

# network solution calibrated data 2013 by Michael
# http://eht-wiki.haystack.mit.edu/Event_Horizon_Telescope_Home/EHT_Data/2013_March/Network_Calibration_Solution/Non-Sgr_A*_Network_Solution
# 1: Day
# 2: HourMinute
# 3: Source
# 4: Baseline (note: "p" and "q" are high-band data for the SMA. I use lowercase because the high-band gains at the SMA are not equal to the low-band gains because they have different phasing efficiencies)
# 5-7: u, v, |{u,v}|
# 8: SEFD_1
# 9: SEFD_2
# 10: a-priori visibility amplitude (from cal_2013)
# 11: visibility amplitude in correlator units
# 12: SNR of visibility amplitude, after incoherent averaging (the thermal noise)
# 13: Elevation_1
# 14: Elevation_2
# 15: Experiment Code
# 16: 0=Low-Band, 1=High-Band
# 17: Gain_1
# 18: Formal Uncertainty in Gain_1 from the network solution
# 19: Gain_2
# 20: Formal Uncertainty in Gain_2 from the network solution
# 21: chi^2 of the visibility after using the network solution (i.e., departure from self-consistent solution, in units of \sigma, squared)
# 22. Calibrated Visibility Amplitude (Jy)
# 23. Estimated systematic uncertainty, as a fraction of the Calibrated Visibility Amplitude (#2).
NETWORKSOL_FIELDS = (
    ('day', str), # day
    ('hhmm', str),
    ('source', str),
    ('baseline', str),
    ('u', float),
    ('v', float),
    ('uvmag', float),
    ('sefd_1', float),
    ('sefd_2', float),
    ('calamp_apriori', float),
    ('amp', float),
    ('snr', float),
    ('el_1', float),
    ('el_2', float),
    ('expt', int),
    ('band', int),
    ('gain_1', float),
    ('gainerr_1', float),
    ('gain_2', float),
    ('gainerr_2', float),
    ('chisq', float),
    ('calamp_network', float),
#    ('syserr_fraction', float),
)

networksol_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0,1]},
    keep_date_col=True,
    # date_parser=lambda dates,times: [x+y for (x,y) in zip(dates,times)],
    date_parser=lambda day,hhmm: [datetime.datetime.strptime(
        '13' + '%03d' % int(x) + '%04d' % int(y), '%y%j%H%M') for (x,y) in zip(day,hhmm)],
    names=[a[0] for a in NETWORKSOL_FIELDS],
    error_bad_lines=True,
    warn_bad_lines=True,
)

[docs]def read_networksol(filename):
    table = pd.read_csv(filename, **networksol_pandasargs)
    table.dropna(how="any", inplace=True)
    return table

# modified networksol by Michael with model visibs and flag
NETWORKSOL2_FIELDS = (
    ('day', str), # day
    ('hhmm', str),
    ('source', str),
    ('baseline', str),
    ('u', float),
    ('v', float),
    ('uvmag', float),
    ('sefd_1', float),
    ('sefd_2', float),
    ('calamp_apriori', float),
    ('amp', float),
    ('snr', float),
    ('el_1', float),
    ('el_2', float),
    ('expt', int),
    ('band', int),
	('model', float),
    ('gain_1', float),
    ('gainerr_1', float),
    ('gain_2', float),
    ('gainerr_2', float),
    ('chisq', float),
    ('calamp_network', float),
	('flag', int),
#    ('syserr_fraction', float),
)

networksol2_pandasargs = dict(
    delim_whitespace=True,
    comment='#',
    header=None,
    parse_dates={'datetime':[0,1]},
    keep_date_col=True,
    date_parser=lambda dates,times: [x+y for (x,y) in zip(dates,times)],
    names=[a[0] for a in NETWORKSOL2_FIELDS],
    error_bad_lines=True,
    warn_bad_lines=True,
)

[docs]def read_networksol2(filename):
    table = pd.read_csv(filename, **networksol2_pandasargs)
    table.dropna(how="any", inplace=True)
    return table


# aedit output produced by Vincent with channel-specific amplitudes
# 30 channel mode
#   1. path to fringe file (including experiment number, doy-hhmmss, baseline)
#   2. source
#   3. number of channels
#   4. amplitude (times 10^{-4}) [technically, correlation coefficient]
#   5. signal-to-noise ratio
#   6-35. amplitudes in each of the channels
BANDPASS_FIELDS = [
    ('path', str),
    ('source', str),
    ('nchan', str),
    ('amp', float),
    ('snr', float),
]

bandpass_pandasargs = dict(
    delim_whitespace=True,
    comment='*',
    header=None,
)

[docs]def read_bandpass(filename):
    table = pd.read_csv(filename, **bandpass_pandasargs)
    table.columns = [a[0] for a in BANDPASS_FIELDS] + \
        ['amp_%d' % (i+1) for i in range(len(table.columns) - 5)]
    table['experiment'], table['scan'], table['filename'] = \
        list(zip(*table['path'].apply(lambda x: x.split('/'))))
    table['baseline'] = table['filename'].apply(lambda x: x[:2])
    return table
Source code for eat.io.hops

EHT Analysis Toolkit

Navigation

Related Topics