Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -13,3 +13,4 @@ build/
simple/version.py
*.zip
*.fits
*sty2054_supplemental_files/
214 changes: 214 additions & 0 deletions scripts/ingests/ingest_zhang18.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,214 @@
import logging
import os
import pandas as pd
from pathlib import Path
from astropy.io import fits
from astrodb_utils import AstroDBError, load_astrodb
from astrodb_utils.spectra import ingest_spectrum
from astrodb_utils.instruments import ingest_instrument
from astrodb_utils.sources import find_source_in_db, ingest_source
from astroquery.simbad import Simbad
from simple import REFERENCE_TABLES
from specutils import Spectrum
from datetime import datetime

# set up logging
astrodb_logger = logging.getLogger("astrodb_utils")
astrodb_logger.setLevel(logging.INFO)

# logger for ingest_zhang18
logger = logging.getLogger("ingest_zhang18")
logger.setLevel(logging.INFO)

# Load DB
SAVE_DB = False
RECREATE_DB = True
SCHEMA_PATH = "simple/schema.yaml"
db = load_astrodb(
"SIMPLE.sqlite",
recreatedb=RECREATE_DB,
reference_tables=REFERENCE_TABLES,
felis_schema=SCHEMA_PATH,
)

# Path
file_path = "scripts/spectra_convert/zhang18/processed_spectra"

# Add new mode for SDSS and ESO VLT Telescope
def add_mode():
modes = [
{
"telescope": "ESO VLT", # existed
"instrument": "Xshooter", # existed
"mode": "Echelle-Smoothed", # new
},
{
"telescope": "Magellan I Baade", # existed
"instrument": "IMACS", # new
"mode": "Missing"

},
{
"telescope": "SDSS", # new
"instrument": "BOSS", # new
"mode": "Missing"
}
]
for mode in modes:
try:
ingest_instrument(
db,
telescope=mode["telescope"],
instrument=mode["instrument"],
mode=mode["mode"]
)
print(f"Mode {mode['instrument']} added successfully!")
except AstroDBError as e:
astrodb_logger.error(f"Error adding {mode['instrument']} mode: {e}")


def modify_date(obs_date):
if not obs_date:
return None

return datetime.fromisoformat(obs_date) # example: 2004-04-17 04:40:11.761000

def add_url(filename):
modified_filename = filename.replace('+', '%2B')
return "https://bdnyc.s3.us-east-1.amazonaws.com/zhang18/" + modified_filename


# Ingest spectra --
def ingest_zhang18():
source_ingested = 0
source_failed = 0
spectrum_ingested = 0
spectrum_failed = 0
failed_file = []

fits_file = list(Path(file_path).glob("*.fits"))

for file in fits_file:
filename = file.name
print(f"Reading {filename}")

with fits.open(file) as hdul:
header = hdul[0].header

# Get all neccessary info to ingest
source_name = header.get("OBJECT")
access_url = add_url(filename)

# Date-OBS
obs_date = modify_date(header.get("DATE-OBS"))

# publication
ref = header.get("VOREF")
if ref == "10.1093/mnras/stw2438": # I
reference = "Zhan17.3040"
elif ref == "10.1093/mnras/stx350": # II
reference = "Zhan17"
elif ref == "10.1093/mnras/sty1352": # III
reference = "Zhan18.1352"
elif ref == "10.1093/mnras/sty2054": # IV
reference = "Zhan18.2054"

# regime
if "NIR" in filename:
regime = "nir"
else:
regime = "optical"

# mode
if "SMOOTHED" in filename:
mode = "Echelle-Smoothed"
elif "Xshooter" in filename and not "SMOOTHED" in filename:
mode = "Echelle"
elif "FIRE" in filename:
mode = "Prism"
else:
mode = "Missing"

# instrument
if "IMAC" in filename:
instrument = "IMACS"
telescope = "Magellan I Baade"
else:
instrument = header.get("INSTRUME")
telescope = header.get("TELESCOP")

# --- Ingest Source if not exist in DB ---
result = Simbad.query_object(source_name)
ra = result[0]["ra"]
dec = result[0]["dec"]
source = find_source_in_db(
db,
source_name,
ra_col_name = "ra",
dec_col_name = "dec",
ra = ra,
dec =dec,
use_simbad=True
)
if source is None:
logger.info(f"Source {source_name} is not found in SIMPLE database")
continue

if not source:
try:
ingest_source(
db,
source_name,
reference,
ra_col_name="ra",
dec_col_name="dec",
ra=ra,
dec=dec,
epoch_col_name="epoch",
use_simbad=True
)
print(f"Source {source_name} ingested successfully!")
source_ingested += 1
except AstroDBError as e:
logger.error(f"Error ingesting source {source_name}: {e}")
source_failed += 1
failed_file.append(filename)
continue

# iNgest Spectra
try:
print(f"Ingesting spectrum for {source_name} with access URL: {access_url}\n regime: {regime}, mode: {mode}, telescope: {telescope}, instrument: {instrument}, obs_date: {obs_date}, reference: {reference}\n")
logger.debug(f"Found db_name: {source} for source: {source_name}\n")
ingest_spectrum(
db,
source=source_name,
spectrum=access_url,
regime=regime,
mode=mode,
telescope=telescope,
instrument=instrument,
obs_date=obs_date,
reference=reference
)
print("Successfully ingested spectrum\n")
spectrum_ingested += 1
except AstroDBError as e:
logger.error(f"Error ingesting spectrum: {e}")
spectrum_failed += 1
failed_file.append(filename)


print(f"\nIngestion complete! \nTotal sources ingested: {source_ingested}")
print(f"Total sources failed to ingest: {source_failed}")

print(f"\nTotal spectra ingested: {spectrum_ingested}")
print(f"Total spectra failed to ingest: {spectrum_failed}")
print(f"Failed files: {failed_file}")


add_mode()
ingest_zhang18()

if SAVE_DB:
db.save_db(directory="data/")

170 changes: 170 additions & 0 deletions scripts/spectra_convert/zhang18/add_header.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
import os
import numpy as np
import pandas as pd
from astropy.io import fits
from astropy.time import Time
from datetime import datetime
import astropy.units as u
from specutils import Spectrum
from astrodb_utils.fits import add_missing_keywords, add_wavelength_keywords, check_header
from astrodb_utils.spectra import check_spectrum_plottable
from astroquery.simbad import Simbad
from pathlib import Path

# Path config
path = "/Users/guanying/SIMPLE db/SIMPLE-db/scripts/spectra_convert/zhang18/processed_spectra"
spreadsheet = "https://docs.google.com/spreadsheets/d/e/2PACX-1vS_VuhqnOHq9Zqu2GOcPSJks6Te161VaGJVkkN1EJYVplDqoBHgK2N1yTuD7MDPcwyI4BPUB0x2gKKo/pub?gid=220627455&single=true&output=csv"


def get_paper_metadata(filename):
if "-I_SIMPLE.fits" in filename or "-I_SMOOTHED_SIMPLE.fits" in filename:
title = "Primeval very low-mass stars and brown dwarfs - I. Six new L subdwarfs, classification and atmospheric properties"
voref = "10.1093/mnras/stw2438"
elif "-II_SIMPLE.fits" in filename or "-II_SMOOTHED_SIMPLE.fits" in filename:
title = "Primeval very low-mass stars and brown dwarfs - II. The most metal-poor substellar object"
voref = "10.1093/mnras/stx350"
elif "-III_SIMPLE.fits" in filename or "-III_SMOOTHED_SIMPLE.fits" in filename:
title = "Primeval very low-mass stars and brown dwarfs - III. The halo transitional brown dwarfs"
voref = "10.1093/mnras/sty1352"
elif "-IV_SIMPLE.fits" in filename or "IV_SMOOTHED_SIMPLE.fits" in filename:
title = "Primeval very low-mass stars and brown dwarfs - IV. New L subdwarfs, Gaia astrometry, population properties, and a blue brown dwarf binary"
voref = "10.1093/mnras/sty2054"
else:
title = "Unknown Paper"
voref = "Unknown DOI"

return title, voref

def get_regime(filename):
if "NIR" in filename:
regime = "nir"
else:
regime = "optical"

return regime


def add_header(path):
missing_telescop_instrument = []
missing_dateobs = []
file_proceed = 0


fits_files = list(Path(path).glob("*.fits"))

for fits_file in fits_files:
filename = fits_file.name
print(f"\nProcessing {filename}...")
spectrum = Spectrum.read(fits_file)

title, voref = get_paper_metadata(filename)

with fits.open(fits_file, mode="update") as hdul:
header = hdul[0].header

# add SIMPLE FITS headers
header["VOPUB"] = ("SIMPLE Archive", "Publication of the spectrum")

# object name
object_name = " ".join(filename.split("_")[:2])
header["OBJECT"] = (object_name, "Name of the object")

# Query simbad for ra/dec
try:
result = Simbad.query_object(object_name)
if result is not None and len(result) > 0:
header["RA_TARG"] = (result[0]["ra"], "[ra] Pointing position")
header["DEC_TARG"] = (result[0]["dec"], "[dec] Pointing position")
except Exception as e:
print(f" SIMBAD lookup failed: {e}")

# Remove deprecated keyword
if "RADECSYS" in header:
header.remove("RADECSYS", ignore_missing=True)
print(" Removed RADECSYS")

# Check TELESCOP + INSTRUME
if "TELESCOP" not in header or "INSTRUME" not in header:
missing_telescop_instrument.append(filename)

# Modify TELESCOPE + add OBSERVAT
if "Xshooter" in filename:
header["TELESCOP"] = ("ESO VLT")
header["OBSERVAT"] = ("European Southern Observatory")
elif "OSIRIS" in filename:
header["TELESCOP"] = ("GTC")
header["OBSERVAT"] = ("Roque de los Muchachos Observatory (ORM)")
elif "FIRE" in filename:
header["TELESCOP"] = ("Magellan I Baade")
header["OBSERVAT"] = ("Las Campanas Observatory")
elif "_SDSS_" in filename:
header["TELESCOP"] = ("SDSS")

# Get SPECBAND (regime)
regime = get_regime(filename)
header["SPECBAND"] = (regime)

# Get APERTURE (slit width)
if (header.get("SLITW")) is not None:
header["APERTURE"] = (header.get("SLITW"))
else:
if "Xshooter" in filename :
header["APERTURE"] = ("1.2", "Slit width in arcsec")

add_wavelength_keywords(header, spectrum.spectral_axis)

# Paper metadata
header["TITLE"] = (title, "Title of the paper")
header["VOREF"] = (voref, "DOI of the paper")
header["AUTHOR"] = ("Zhang, Z.H. et al.", "Original authors")
header["DATE"] = (datetime.now().strftime("%Y-%m-%dT%H:%M:%S"))
header["CONTRIB1"] = ("Guan Ying Goh")
header["TELAPSE"] = (header.get("EXPTIME"))
mjd_obs = header.get("MJD-OBS")
exptime = header.get("EXPTIME")
if mjd_obs is None or exptime is None:
print(f" WARNING: MJD-OBS or EXPTIME missing in header for {filename}. Cannot calculate TMID.")
if mjd_obs is not None and exptime is not None:
tmid = mjd_obs + (exptime / 2) / (60 * 60 * 24)
header.set("TMID", tmid, "[d] MJD of exposure mid-point")
print(" Added paper metadata")

# Fix flux units
if "BUNIT" not in header:
header["BUNIT"] = "erg / (cm2 s Angstrom)"
elif header["BUNIT"] == "erg/s/cm2/Angstrom":
header["BUNIT"] = "erg / (cm2 s Angstrom)"

# date-obs check
if "DATE-OBS" not in header or header["DATE-OBS"] in ["", "UNKNOWN", None]:
missing_dateobs.append(filename)

add_missing_keywords(header)
check_header(header)

hdul.verify('exception')
hdul.flush()
print(f" Finished {filename}")
file_proceed += 1

# Summary
print(f"\File processed: {file_proceed}/{len(fits_files)}")
if missing_telescop_instrument:
print("\nFiles missing TELESCOP or INSTRUME:")
for f in missing_telescop_instrument:
print(f" - {f}")

if missing_dateobs:
print("\nFiles missing DATE-OBS:")
for f in missing_dateobs:
print(f" - {f}")

"""
\File processed: 120/120
64 OSIRIS
52 Xshooter with smoothed and unsmoothed (ignored UVB)
2 FIRE_Magellan
1 SDSS
1 IMACS
"""
add_header(path)
Loading